From 4b48f2b254d439bccc7c35029628e8042a65daec Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Wed, 8 Jan 2025 15:17:53 +0800 Subject: [PATCH] [skip e2e] enhance: Use template to remove unittest duplication Signed-off-by: Cai Yudong --- internal/core/src/common/VectorTrait.h | 76 +- internal/core/unittest/test_c_api.cpp | 1106 +++-------------- internal/core/unittest/test_float16.cpp | 12 +- internal/core/unittest/test_query.cpp | 2 +- internal/core/unittest/test_utils/DataGen.h | 167 +-- .../unittest/test_utils/c_api_test_utils.h | 29 +- 6 files changed, 265 insertions(+), 1127 deletions(-) diff --git a/internal/core/src/common/VectorTrait.h b/internal/core/src/common/VectorTrait.h index e021558bbde2d..19c112737a670 100644 --- a/internal/core/src/common/VectorTrait.h +++ b/internal/core/src/common/VectorTrait.h @@ -15,43 +15,107 @@ // limitations under the License. #pragma once -#include "Types.h" + #include #include + #include "Array.h" +#include "Types.h" +#include "common/type_c.h" +#include "pb/common.pb.h" +#include "pb/plan.pb.h" +#include "pb/schema.pb.h" namespace milvus { +#define GET_ELEM_TYPE_FOR_VECTOR_TRAIT \ + using elem_type = std::conditional_t< \ + std::is_same_v, \ + BinaryVector::embedded_type, \ + std::conditional_t< \ + std::is_same_v, \ + Float16Vector::embedded_type, \ + std::conditional_t< \ + std::is_same_v, \ + BFloat16Vector::embedded_type, \ + FloatVector::embedded_type>>>; + +#define GET_SCHEMA_DATA_TYPE_FOR_VECTOR_TRAIT \ + auto schema_data_type = \ + std::is_same_v \ + ? FloatVector::schema_data_type \ + : std::is_same_v \ + ? Float16Vector::schema_data_type \ + : std::is_same_v \ + ? BFloat16Vector::schema_data_type \ + : BinaryVector::schema_data_type; + class VectorTrait {}; class FloatVector : public VectorTrait { public: using embedded_type = float; - static constexpr auto metric_type = DataType::VECTOR_FLOAT; + static constexpr int32_t dim_factor = 1; + static constexpr auto data_type = DataType::VECTOR_FLOAT; + static constexpr auto c_data_type = CDataType::FloatVector; + static constexpr auto schema_data_type = + proto::schema::DataType::FloatVector; + static constexpr auto vector_type = proto::plan::VectorType::FloatVector; + static constexpr auto placeholder_type = + proto::common::PlaceholderType::FloatVector; }; class BinaryVector : public VectorTrait { public: using embedded_type = uint8_t; - static constexpr auto metric_type = DataType::VECTOR_BINARY; + static constexpr int32_t dim_factor = 8; + static constexpr auto data_type = DataType::VECTOR_BINARY; + static constexpr auto c_data_type = CDataType::BinaryVector; + static constexpr auto schema_data_type = + proto::schema::DataType::BinaryVector; + static constexpr auto vector_type = proto::plan::VectorType::BinaryVector; + static constexpr auto placeholder_type = + proto::common::PlaceholderType::BinaryVector; }; class Float16Vector : public VectorTrait { public: using embedded_type = float16; - static constexpr auto metric_type = DataType::VECTOR_FLOAT16; + static constexpr int32_t dim_factor = 1; + static constexpr auto data_type = DataType::VECTOR_FLOAT16; + static constexpr auto c_data_type = CDataType::Float16Vector; + static constexpr auto schema_data_type = + proto::schema::DataType::Float16Vector; + static constexpr auto vector_type = proto::plan::VectorType::Float16Vector; + static constexpr auto placeholder_type = + proto::common::PlaceholderType::Float16Vector; }; class BFloat16Vector : public VectorTrait { public: using embedded_type = bfloat16; - static constexpr auto metric_type = DataType::VECTOR_BFLOAT16; + static constexpr int32_t dim_factor = 1; + static constexpr auto data_type = DataType::VECTOR_BFLOAT16; + static constexpr auto c_data_type = CDataType::BFloat16Vector; + static constexpr auto schema_data_type = + proto::schema::DataType::BFloat16Vector; + static constexpr auto vector_type = proto::plan::VectorType::BFloat16Vector; + static constexpr auto placeholder_type = + proto::common::PlaceholderType::BFloat16Vector; }; class SparseFloatVector : public VectorTrait { public: using embedded_type = float; - static constexpr auto metric_type = DataType::VECTOR_SPARSE_FLOAT; + static constexpr int32_t dim_factor = 1; + static constexpr auto data_type = DataType::VECTOR_SPARSE_FLOAT; + static constexpr auto c_data_type = CDataType::SparseFloatVector; + static constexpr auto schema_data_type = + proto::schema::DataType::SparseFloatVector; + static constexpr auto vector_type = + proto::plan::VectorType::SparseFloatVector; + static constexpr auto placeholder_type = + proto::common::PlaceholderType::SparseFloatVector; }; template diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index 61a2d86cde29e..af2e5a7fb78c4 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -26,6 +26,7 @@ #include "common/LoadInfo.h" #include "common/Types.h" #include "common/type_c.h" +#include "common/VectorTrait.h" #include "index/IndexFactory.h" #include "knowhere/comp/index_param.h" #include "pb/plan.pb.h" @@ -117,58 +118,6 @@ CRetrieveByOffsets(CSegmentInterface c_segment, return status; } -const char* -get_float16_schema_config() { - static std::string conf = R"(name: "float16-collection" - fields: < - fieldID: 100 - name: "fakevec" - data_type: Float16Vector - type_params: < - key: "dim" - value: "16" - > - index_params: < - key: "metric_type" - value: "L2" - > - > - fields: < - fieldID: 101 - name: "age" - data_type: Int64 - is_primary_key: true - >)"; - static std::string fake_conf = ""; - return conf.c_str(); -} - -const char* -get_bfloat16_schema_config() { - static std::string conf = R"(name: "bfloat16-collection" - fields: < - fieldID: 100 - name: "fakevec" - data_type: BFloat16Vector - type_params: < - key: "dim" - value: "16" - > - index_params: < - key: "metric_type" - value: "L2" - > - > - fields: < - fieldID: 101 - name: "age" - data_type: Int64 - is_primary_key: true - >)"; - static std::string fake_conf = ""; - return conf.c_str(); -} - const char* get_default_index_meta() { static std::string conf = R"(maxIndexRowCount: 1000 @@ -221,75 +170,19 @@ generate_data(int N) { return std::make_tuple(raw_data, timestamps, uids); } +template std::string -generate_query_data_float16(int nq) { - namespace ser = milvus::proto::common; - std::default_random_engine e(67); - int dim = DIM; - std::normal_distribution dis(0.0, 1.0); - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::Float16Vector); - for (int i = 0; i < nq; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(float16(dis(e))); - } - value->add_values(vec.data(), vec.size() * sizeof(float16)); - } - auto blob = raw_group.SerializeAsString(); - return blob; -} - -std::string -generate_query_data_bfloat16(int nq) { - namespace ser = milvus::proto::common; - std::default_random_engine e(67); - int dim = DIM; - std::normal_distribution dis(0.0, 1.0); - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::BFloat16Vector); - for (int i = 0; i < nq; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(bfloat16(dis(e))); - } - value->add_values(vec.data(), vec.size() * sizeof(bfloat16)); - } - auto blob = raw_group.SerializeAsString(); - return blob; -} -// create Enum for schema::DataType::BinaryVector,schema::DataType::FloatVector -enum VectorType { - BinaryVector = 0, - FloatVector = 1, - Float16Vector = 2, - BFloat16Vector = 3, -}; - -std::string -generate_collection_schema(std::string metric_type, - int dim, - VectorType vector_type) { +generate_collection_schema(std::string metric_type, int dim) { namespace schema = milvus::proto::schema; + GET_SCHEMA_DATA_TYPE_FOR_VECTOR_TRAIT; + schema::CollectionSchema collection_schema; collection_schema.set_name("collection_test"); auto vec_field_schema = collection_schema.add_fields(); vec_field_schema->set_name("fakevec"); vec_field_schema->set_fieldid(100); - if (vector_type == VectorType::BinaryVector) { - vec_field_schema->set_data_type(schema::DataType::BinaryVector); - } else if (vector_type == VectorType::Float16Vector) { - vec_field_schema->set_data_type(schema::DataType::Float16Vector); - } else if (vector_type == VectorType::BFloat16Vector) { - vec_field_schema->set_data_type(schema::DataType::BFloat16Vector); - } else { - vec_field_schema->set_data_type(schema::DataType::FloatVector); - } + vec_field_schema->set_data_type(schema_data_type); auto metric_type_param = vec_field_schema->add_index_params(); metric_type_param->set_key("metric_type"); metric_type_param->set_value(metric_type); @@ -315,23 +208,6 @@ generate_collection_schema(std::string metric_type, return schema_string; } -// VecIndexPtr -// generate_index( -// void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, knowhere::IndexType index_type) { -// auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type); -// -// auto database = knowhere::GenDataset(N, dim, raw_data); -// indexing->Train(database, conf); -// indexing->AddWithoutIds(database, conf); -// EXPECT_EQ(indexing->Count(), N); -// EXPECT_EQ(indexing->Dim(), dim); -// -// EXPECT_EQ(indexing->Count(), N); -// EXPECT_EQ(indexing->Dim(), dim); -// return indexing; -//} -//} // namespace - IndexBasePtr generate_index(void* raw_data, DataType field_type, @@ -404,71 +280,16 @@ TEST(CApiTest, SegmentTest) { free((char*)status.error_msg); } -TEST(CApiTest, CPlan) { - std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM, VectorType::BinaryVector); - auto collection = NewCollection(schema_string.c_str()); - - // const char* dsl_string = R"( - // { - // "bool": { - // "vector": { - // "fakevec": { - // "metric_type": "L2", - // "params": { - // "nprobe": 10 - // }, - // "query": "$0", - // "topk": 10, - // "round_decimal": 3 - // } - // } - // } - // })"; - - milvus::proto::plan::PlanNode plan_node; - auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type(milvus::proto::plan::VectorType::BinaryVector); - vector_anns->set_placeholder_tag("$0"); - vector_anns->set_field_id(100); - auto query_info = vector_anns->mutable_query_info(); - query_info->set_topk(10); - query_info->set_round_decimal(3); - query_info->set_metric_type("L2"); - query_info->set_search_params(R"({"nprobe": 10})"); - auto plan_str = plan_node.SerializeAsString(); - - void* plan = nullptr; - auto status = CreateSearchPlanByExpr( - collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - int64_t field_id = -1; - status = GetFieldID(plan, &field_id); - ASSERT_EQ(status.error_code, Success); - - auto col = static_cast(collection); - for (auto& [target_field_id, field_meta] : - col->get_schema()->get_fields()) { - if (field_meta.is_vector()) { - ASSERT_EQ(field_id, target_field_id.get()); - } - } - ASSERT_NE(field_id, -1); - - DeleteSearchPlan(plan); - DeleteCollection(collection); -} - -TEST(CApiTest, CApiCPlan_float16) { - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, 16, VectorType::Float16Vector); +template +void +Test_CPlan(const knowhere::MetricType& metric_type) { + std::string schema_string = + generate_collection_schema(knowhere::metric::JACCARD, DIM); auto collection = NewCollection(schema_string.c_str()); milvus::proto::plan::PlanNode plan_node; auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type( - milvus::proto::plan::VectorType::Float16Vector); + vector_anns->set_vector_type(TraitType::vector_type); vector_anns->set_placeholder_tag("$0"); vector_anns->set_field_id(100); auto query_info = vector_anns->mutable_query_info(); @@ -500,44 +321,11 @@ TEST(CApiTest, CApiCPlan_float16) { DeleteCollection(collection); } -TEST(CApiTest, CApiCPlan_bfloat16) { - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, 16, VectorType::BFloat16Vector); - auto collection = NewCollection(schema_string.c_str()); - - milvus::proto::plan::PlanNode plan_node; - auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type( - milvus::proto::plan::VectorType::BFloat16Vector); - vector_anns->set_placeholder_tag("$0"); - vector_anns->set_field_id(100); - auto query_info = vector_anns->mutable_query_info(); - query_info->set_topk(10); - query_info->set_round_decimal(3); - query_info->set_metric_type("L2"); - query_info->set_search_params(R"({"nprobe": 10})"); - auto plan_str = plan_node.SerializeAsString(); - - void* plan = nullptr; - auto status = CreateSearchPlanByExpr( - collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - int64_t field_id = -1; - status = GetFieldID(plan, &field_id); - ASSERT_EQ(status.error_code, Success); - - auto col = static_cast(collection); - for (auto& [target_field_id, field_meta] : - col->get_schema()->get_fields()) { - if (field_meta.is_vector()) { - ASSERT_EQ(field_id, target_field_id.get()); - } - } - ASSERT_NE(field_id, -1); - - DeleteSearchPlan(plan); - DeleteCollection(collection); +TEST(CApiTest, CPlan) { + Test_CPlan(knowhere::metric::JACCARD); + Test_CPlan(knowhere::metric::L2); + Test_CPlan(knowhere::metric::L2); + Test_CPlan(knowhere::metric::L2); } TEST(CApiTest, InsertTest) { @@ -1064,7 +852,7 @@ TEST(CApiTest, SearcTestWhenNullable) { auto plan_str = plan_node.SerializeAsString(); int num_queries = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlanByExpr( @@ -1310,7 +1098,7 @@ TEST(CApiTest, SearchTest) { auto plan_str = plan_node.SerializeAsString(); int num_queries = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlanByExpr( @@ -1377,7 +1165,7 @@ TEST(CApiTest, SearchTestWithExpr) { >)"; int num_queries = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); @@ -1736,7 +1524,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) { int num_queries = 10; int topK = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlanByExpr( @@ -1829,7 +1617,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) { DeleteSegment(segment); } -template +template void testReduceSearchWithExpr(int N, int topK, @@ -1837,19 +1625,8 @@ testReduceSearchWithExpr(int N, bool filter_all = false) { std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", " << num_queries << ")" << std::endl; - std::function schema_fun; - std::function query_gen_fun; - if constexpr (std::is_same_v) { - schema_fun = get_default_schema_config; - query_gen_fun = generate_query_data; - } else if constexpr (std::is_same_v) { - schema_fun = get_float16_schema_config; - query_gen_fun = generate_query_data_float16; - } else if constexpr (std::is_same_v) { - schema_fun = get_bfloat16_schema_config; - query_gen_fun = generate_query_data_bfloat16; - } - auto collection = NewCollection(schema_fun()); + + auto collection = NewCollection(get_default_schema_config()); CSegmentInterface segment; auto status = NewSegment(collection, Growing, -1, &segment, false); ASSERT_EQ(status.error_code, Success); @@ -1907,7 +1684,7 @@ testReduceSearchWithExpr(int N, topK % N; } auto serialized_expr_plan = fmt.str(); - auto blob = query_gen_fun(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; auto binary_plan = @@ -1997,29 +1774,29 @@ testReduceSearchWithExpr(int N, } TEST(CApiTest, ReduceSearchWithExpr) { - //float32 - testReduceSearchWithExpr(2, 1, 1); - testReduceSearchWithExpr(2, 10, 10); - testReduceSearchWithExpr(100, 1, 1); - testReduceSearchWithExpr(100, 10, 10); - testReduceSearchWithExpr(10000, 1, 1); - testReduceSearchWithExpr(10000, 10, 10); - //float16 - testReduceSearchWithExpr(2, 10, 10, false); - testReduceSearchWithExpr(100, 10, 10, false); - //bfloat16 - testReduceSearchWithExpr(2, 10, 10, false); - testReduceSearchWithExpr(100, 10, 10, false); + // float32 + testReduceSearchWithExpr(2, 1, 1); + testReduceSearchWithExpr(2, 10, 10); + testReduceSearchWithExpr(100, 1, 1); + testReduceSearchWithExpr(100, 10, 10); + testReduceSearchWithExpr(10000, 1, 1); + testReduceSearchWithExpr(10000, 10, 10); + // float16 + testReduceSearchWithExpr(2, 10, 10, false); + testReduceSearchWithExpr(100, 10, 10, false); + // bfloat16 + testReduceSearchWithExpr(2, 10, 10, false); + testReduceSearchWithExpr(100, 10, 10, false); } TEST(CApiTest, ReduceSearchWithExprFilterAll) { - //float32 - testReduceSearchWithExpr(2, 1, 1, true); - testReduceSearchWithExpr(2, 10, 10, true); - //float16 - testReduceSearchWithExpr(2, 1, 1, true); - //bfloat16 - testReduceSearchWithExpr(2, 1, 1, true); + // float32 + testReduceSearchWithExpr(2, 1, 1, true); + testReduceSearchWithExpr(2, 10, 10, true); + // float16 + testReduceSearchWithExpr(2, 1, 1, true); + // bfloat16 + testReduceSearchWithExpr(2, 1, 1, true); } TEST(CApiTest, LoadIndexInfo) { @@ -2118,12 +1895,16 @@ TEST(CApiTest, LoadIndexSearch) { auto result = indexing.Search(query_dataset, conf, nullptr); } -TEST(CApiTest, Indexing_Without_Predicate) { +template +void +Test_Indexing_Without_Predicate() { + GET_ELEM_TYPE_FOR_VECTOR_TRAIT + // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = + generate_collection_schema(knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -2132,7 +1913,7 @@ TEST(CApiTest, Indexing_Without_Predicate) { auto N = ROW_COUNT; auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col(FieldId(100)); + auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + BIAS * DIM; int64_t offset; @@ -2150,7 +1931,7 @@ TEST(CApiTest, Indexing_Without_Predicate) { milvus::proto::plan::PlanNode plan_node; auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector); + vector_anns->set_vector_type(TraitType::vector_type); vector_anns->set_placeholder_tag("$0"); vector_anns->set_field_id(100); auto query_info = vector_anns->mutable_query_info(); @@ -2163,7 +1944,7 @@ TEST(CApiTest, Indexing_Without_Predicate) { // create place_holder_group int num_queries = 5; auto raw_group = - CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); + CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -2192,7 +1973,7 @@ TEST(CApiTest, Indexing_Without_Predicate) { // load index to segment auto indexing = generate_index(vec_col.data(), - DataType::VECTOR_FLOAT, + TraitType::data_type, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFSQ8, DIM, @@ -2232,7 +2013,7 @@ TEST(CApiTest, Indexing_Without_Predicate) { AppendIndexParam( c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo( - c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, ""); + c_load_index_info, 0, 0, 0, 100, TraitType::c_data_type, false, ""); AppendIndexEngineVersionToLoadInfo( c_load_index_info, knowhere::Version::GetCurrentVersion().VersionNumber()); @@ -2267,12 +2048,18 @@ TEST(CApiTest, Indexing_Without_Predicate) { DeleteSegment(segment); } +TEST(CApiTest, Indexing_Without_Predicate) { + Test_Indexing_Without_Predicate(); + Test_Indexing_Without_Predicate(); + Test_Indexing_Without_Predicate(); +} + TEST(CApiTest, Indexing_Expr_Without_Predicate) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -2421,8 +2208,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -2599,8 +2386,8 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -2779,8 +2566,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -2951,8 +2738,8 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -3124,8 +2911,9 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM, VectorType::BinaryVector); + std::string schema_string = + generate_collection_schema( + knowhere::metric::JACCARD, DIM); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3194,8 +2982,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { // create place_holder_group int num_queries = 5; - auto raw_group = - CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); + auto raw_group = CreatePlaceholderGroupFromBlob( + num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3304,8 +3092,9 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM, VectorType::BinaryVector); + std::string schema_string = + generate_collection_schema( + knowhere::metric::JACCARD, DIM); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3373,8 +3162,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { // create place_holder_group int num_queries = 5; - auto raw_group = - CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); + auto raw_group = CreatePlaceholderGroupFromBlob( + num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3484,8 +3273,9 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM, VectorType::BinaryVector); + std::string schema_string = + generate_collection_schema( + knowhere::metric::JACCARD, DIM); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3549,8 +3339,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { // create place_holder_group int num_queries = 5; int topK = 5; - auto raw_group = - CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); + auto raw_group = CreatePlaceholderGroupFromBlob( + num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3681,8 +3471,9 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM, VectorType::BinaryVector); + std::string schema_string = + generate_collection_schema( + knowhere::metric::JACCARD, DIM); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3745,8 +3536,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { // create place_holder_group int num_queries = 5; int topK = 5; - auto raw_group = - CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); + auto raw_group = CreatePlaceholderGroupFromBlob( + num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3895,8 +3686,8 @@ TEST(CApiTest, SealedSegmentTest) { TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -4048,8 +3839,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { TEST(CApiTest, SealedSegment_search_without_predicates) { constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -4092,7 +3883,7 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { ASSERT_EQ(status.error_code, Success); int num_queries = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlanByExpr( @@ -4128,8 +3919,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { constexpr auto TOPK = 5; - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::FloatVector); + std::string schema_string = generate_collection_schema( + knowhere::metric::L2, DIM); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); CSegmentInterface segment; @@ -4510,8 +4301,10 @@ TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) { DeleteSegment(segment); } -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) { - auto c_collection = NewCollection(get_default_schema_config()); +template +void +Test_Range_Search_With_Radius_And_Range_Filter() { + auto c_collection = NewCollection(get_default_schema_config()); CSegmentInterface segment; auto status = NewSegment(c_collection, Growing, -1, &segment, false); ASSERT_EQ(status.error_code, Success); @@ -4539,15 +4332,15 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) { query_info: < topk: 10 round_decimal: 3 - metric_type: "IP" - search_params: "{\"nprobe\": 10,\"radius\": 10}" + metric_type: "L2" + search_params: "{\"nprobe\": 10,\"radius\": 20, \"range_filter\": 10}" > placeholder_tag: "$0" >)"; auto plan_str = translate_text_plan_to_binary_plan(raw_plan); int num_queries = 10; - auto blob = generate_query_data(num_queries); + auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlanByExpr( @@ -4574,232 +4367,45 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) { DeleteSegment(segment); } -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP) { - auto c_collection = - NewCollection(get_default_schema_config(), knowhere::metric::IP); - CSegmentInterface segment; - auto status = NewSegment(c_collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - auto col = (milvus::segcore::Collection*)c_collection; - - int N = 10000; - auto dataset = DataGen(col->get_schema(), N); - int64_t ts_offset = 1000; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - const char* raw_plan = R"(vector_anns: < - field_id: 100 - query_info: < - topk: 10 - round_decimal: 3 - metric_type: "IP" - search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}" - > - placeholder_tag: "$0" - >)"; - auto plan_str = translate_text_plan_to_binary_plan(raw_plan); - - int num_queries = 10; - auto blob = generate_query_data(num_queries); - - void* plan = nullptr; - status = CreateSearchPlanByExpr( - c_collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - CSearchResult search_result; - auto res = - CSearch(segment, plan, placeholderGroup, ts_offset, &search_result); - ASSERT_EQ(res.error_code, Success); - - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(search_result); - DeleteCollection(c_collection); - DeleteSegment(segment); +TEST(CApiTest, Range_Search_With_Radius_And_Range_Filter) { + Test_Range_Search_With_Radius_And_Range_Filter(); + Test_Range_Search_With_Radius_And_Range_Filter(); + Test_Range_Search_With_Radius_And_Range_Filter(); } -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_L2) { - auto c_collection = NewCollection(get_default_schema_config()); - CSegmentInterface segment; - auto status = NewSegment(c_collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - auto col = (milvus::segcore::Collection*)c_collection; - - int N = 10000; - auto dataset = DataGen(col->get_schema(), N); - int64_t ts_offset = 1000; +std::vector +search_id(const BitsetType& bitset, + Timestamp* timestamps, + Timestamp timestamp, + bool use_find) { + std::vector dst_offset; + if (use_find) { + auto i = bitset.find_first(); + while (i.has_value()) { + auto offset = SegOffset(i.value()); + if (timestamps[offset.get()] <= timestamp) { + dst_offset.push_back(offset); + } - int64_t offset; - PreInsert(segment, N, &offset); + i = bitset.find_next(i.value()); + } - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); + return dst_offset; + } else { + for (int i = 0; i < bitset.size(); i++) { + if (bitset[i]) { + auto offset = SegOffset(i); + if (timestamps[offset.get()] <= timestamp) { + dst_offset.push_back(offset); + } + } + } + } + return dst_offset; +} - const char* raw_plan = R"(vector_anns: < - field_id: 100 - query_info: < - topk: 10 - round_decimal: 3 - metric_type: "L2" - search_params: "{\"nprobe\": 10,\"radius\": 10}" - > - placeholder_tag: "$0" - >)"; - auto plan_str = translate_text_plan_to_binary_plan(raw_plan); - - int num_queries = 10; - auto blob = generate_query_data(num_queries); - - void* plan = nullptr; - status = CreateSearchPlanByExpr( - c_collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - CSearchResult search_result; - auto res = - CSearch(segment, plan, placeholderGroup, ts_offset, &search_result); - ASSERT_EQ(res.error_code, Success); - - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(search_result); - DeleteCollection(c_collection); - DeleteSegment(segment); -} - -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_L2) { - auto c_collection = NewCollection(get_default_schema_config()); - CSegmentInterface segment; - auto status = NewSegment(c_collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - auto col = (milvus::segcore::Collection*)c_collection; - - int N = 10000; - auto dataset = DataGen(col->get_schema(), N); - int64_t ts_offset = 1000; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - const char* raw_plan = R"(vector_anns: < - field_id: 100 - query_info: < - topk: 10 - round_decimal: 3 - metric_type: "L2" - search_params: "{\"nprobe\": 10,\"radius\": 20, \"range_filter\": 10}" - > - placeholder_tag: "$0" - >)"; - auto plan_str = translate_text_plan_to_binary_plan(raw_plan); - - int num_queries = 10; - auto blob = generate_query_data(num_queries); - - void* plan = nullptr; - status = CreateSearchPlanByExpr( - c_collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - CSearchResult search_result; - auto res = - CSearch(segment, plan, placeholderGroup, ts_offset, &search_result); - ASSERT_EQ(res.error_code, Success); - - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(search_result); - DeleteCollection(c_collection); - DeleteSegment(segment); -} - -std::vector -search_id(const BitsetType& bitset, - Timestamp* timestamps, - Timestamp timestamp, - bool use_find) { - std::vector dst_offset; - if (use_find) { - auto i = bitset.find_first(); - while (i.has_value()) { - auto offset = SegOffset(i.value()); - if (timestamps[offset.get()] <= timestamp) { - dst_offset.push_back(offset); - } - - i = bitset.find_next(i.value()); - } - - return dst_offset; - } else { - for (int i = 0; i < bitset.size(); i++) { - if (bitset[i]) { - auto offset = SegOffset(i); - if (timestamps[offset.get()] <= timestamp) { - dst_offset.push_back(offset); - } - } - } - } - return dst_offset; -} - -TEST(CApiTest, SearchIdTest) { - // using BitsetType = boost::dynamic_bitset<>; +TEST(CApiTest, SearchIdTest) { + // using BitsetType = boost::dynamic_bitset<>; auto test = [&](int NT) { BitsetType bitset(1000000); @@ -4848,436 +4454,6 @@ TEST(CApiTest, SearchIdTest) { } } -TEST(CApiTest, Indexing_Without_Predicate_float16) { - // insert data to segment - constexpr auto TOPK = 5; - - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::Float16Vector); - auto collection = NewCollection(schema_string.c_str()); - auto schema = ((segcore::Collection*)collection)->get_schema(); - CSegmentInterface segment; - auto status = NewSegment(collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - - auto N = ROW_COUNT; - auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - milvus::proto::plan::PlanNode plan_node; - auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type( - milvus::proto::plan::VectorType::Float16Vector); - vector_anns->set_placeholder_tag("$0"); - vector_anns->set_field_id(100); - auto query_info = vector_anns->mutable_query_info(); - query_info->set_topk(5); - query_info->set_round_decimal(-1); - query_info->set_metric_type("L2"); - query_info->set_search_params(R"({"nprobe": 10})"); - auto plan_str = plan_node.SerializeAsString(); - - // create place_holder_group - int num_queries = 5; - auto raw_group = - CreateFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr); - auto blob = raw_group.SerializeAsString(); - - // search on segment's small index - void* plan = nullptr; - status = CreateSearchPlanByExpr( - collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - Timestamp timestmap = 10000000; - - CSearchResult c_search_result_on_smallIndex; - auto res_before_load_index = CSearch(segment, - plan, - placeholderGroup, - timestmap, - &c_search_result_on_smallIndex); - ASSERT_EQ(res_before_load_index.error_code, Success); - - // load index to segment - auto indexing = generate_index(vec_col.data(), - DataType::VECTOR_FLOAT16, - knowhere::metric::L2, - IndexEnum::INDEX_FAISS_IDMAP, - DIM, - N); - - // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); - auto vec_index = dynamic_cast(indexing.get()); - auto search_plan = reinterpret_cast(plan); - SearchInfo search_info = search_plan->plan_node_->search_info_; - SearchResult result_on_index; - vec_index->Query(query_dataset, search_info, nullptr, result_on_index); - auto ids = result_on_index.seg_offsets_.data(); - auto dis = result_on_index.distances_.data(); - std::vector vec_ids(ids, ids + TOPK * num_queries); - std::vector vec_dis; - for (int j = 0; j < TOPK * num_queries; ++j) { - vec_dis.push_back(dis[j] * -1); - } - - auto search_result_on_raw_index = - (SearchResult*)c_search_result_on_smallIndex; - search_result_on_raw_index->seg_offsets_ = vec_ids; - search_result_on_raw_index->distances_ = vec_dis; - - auto binary_set = indexing->Serialize(milvus::Config{}); - void* c_load_index_info = nullptr; - status = NewLoadIndexInfo(&c_load_index_info); - ASSERT_EQ(status.error_code, Success); - std::string index_type_key = "index_type"; - std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP; - std::string metric_type_key = "metric_type"; - std::string metric_type_value = knowhere::metric::L2; - - AppendIndexParam( - c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); - AppendIndexParam( - c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); - AppendFieldInfo( - c_load_index_info, 0, 0, 0, 100, CDataType::Float16Vector, false, ""); - AppendIndexEngineVersionToLoadInfo( - c_load_index_info, - knowhere::Version::GetCurrentVersion().VersionNumber()); - AppendIndex(c_load_index_info, (CBinarySet)&binary_set); - - // load index for vec field, load raw data for scalar field - auto sealed_segment = SealedCreator(schema, dataset); - sealed_segment->DropFieldData(FieldId(100)); - sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); - CSearchResult c_search_result_on_bigIndex; - auto res_after_load_index = CSearch(sealed_segment.get(), - plan, - placeholderGroup, - timestmap, - &c_search_result_on_bigIndex); - ASSERT_EQ(res_after_load_index.error_code, Success); - - auto search_result_on_raw_index_json = - SearchResultToJson(*search_result_on_raw_index); - auto search_result_on_bigIndex_json = - SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex)); - - ASSERT_EQ(search_result_on_raw_index_json.dump(1), - search_result_on_bigIndex_json.dump(1)); - - DeleteLoadIndexInfo(c_load_index_info); - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(c_search_result_on_smallIndex); - DeleteSearchResult(c_search_result_on_bigIndex); - DeleteCollection(collection); - DeleteSegment(segment); -} - -TEST(CApiTest, Indexing_Without_Predicate_bfloat16) { - // insert data to segment - constexpr auto TOPK = 5; - - std::string schema_string = generate_collection_schema( - knowhere::metric::L2, DIM, VectorType::BFloat16Vector); - auto collection = NewCollection(schema_string.c_str()); - auto schema = ((segcore::Collection*)collection)->get_schema(); - CSegmentInterface segment; - auto status = NewSegment(collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - - auto N = ROW_COUNT; - auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - milvus::proto::plan::PlanNode plan_node; - auto vector_anns = plan_node.mutable_vector_anns(); - vector_anns->set_vector_type( - milvus::proto::plan::VectorType::BFloat16Vector); - vector_anns->set_placeholder_tag("$0"); - vector_anns->set_field_id(100); - auto query_info = vector_anns->mutable_query_info(); - query_info->set_topk(5); - query_info->set_round_decimal(-1); - query_info->set_metric_type("L2"); - query_info->set_search_params(R"({"nprobe": 10})"); - auto plan_str = plan_node.SerializeAsString(); - - // create place_holder_group - int num_queries = 5; - auto raw_group = - CreateBFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr); - auto blob = raw_group.SerializeAsString(); - - // search on segment's small index - void* plan = nullptr; - status = CreateSearchPlanByExpr( - collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - Timestamp timestmap = 10000000; - - CSearchResult c_search_result_on_smallIndex; - auto res_before_load_index = CSearch(segment, - plan, - placeholderGroup, - timestmap, - &c_search_result_on_smallIndex); - ASSERT_EQ(res_before_load_index.error_code, Success); - - // load index to segment - auto indexing = generate_index(vec_col.data(), - DataType::VECTOR_BFLOAT16, - knowhere::metric::L2, - IndexEnum::INDEX_FAISS_IDMAP, - DIM, - N); - - // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); - auto vec_index = dynamic_cast(indexing.get()); - auto search_plan = reinterpret_cast(plan); - SearchInfo search_info = search_plan->plan_node_->search_info_; - SearchResult result_on_index; - vec_index->Query(query_dataset, search_info, nullptr, result_on_index); - auto ids = result_on_index.seg_offsets_.data(); - auto dis = result_on_index.distances_.data(); - std::vector vec_ids(ids, ids + TOPK * num_queries); - std::vector vec_dis; - for (int j = 0; j < TOPK * num_queries; ++j) { - vec_dis.push_back(dis[j] * -1); - } - - auto search_result_on_raw_index = - (SearchResult*)c_search_result_on_smallIndex; - search_result_on_raw_index->seg_offsets_ = vec_ids; - search_result_on_raw_index->distances_ = vec_dis; - - auto binary_set = indexing->Serialize(milvus::Config{}); - void* c_load_index_info = nullptr; - status = NewLoadIndexInfo(&c_load_index_info); - ASSERT_EQ(status.error_code, Success); - std::string index_type_key = "index_type"; - std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP; - std::string metric_type_key = "metric_type"; - std::string metric_type_value = knowhere::metric::L2; - - AppendIndexParam( - c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); - AppendIndexParam( - c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); - AppendFieldInfo( - c_load_index_info, 0, 0, 0, 100, CDataType::BFloat16Vector, false, ""); - AppendIndexEngineVersionToLoadInfo( - c_load_index_info, - knowhere::Version::GetCurrentVersion().VersionNumber()); - AppendIndex(c_load_index_info, (CBinarySet)&binary_set); - - // load index for vec field, load raw data for scalar field - auto sealed_segment = SealedCreator(schema, dataset); - sealed_segment->DropFieldData(FieldId(100)); - sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); - CSearchResult c_search_result_on_bigIndex; - auto res_after_load_index = CSearch(sealed_segment.get(), - plan, - placeholderGroup, - timestmap, - &c_search_result_on_bigIndex); - ASSERT_EQ(res_after_load_index.error_code, Success); - - auto search_result_on_raw_index_json = - SearchResultToJson(*search_result_on_raw_index); - auto search_result_on_bigIndex_json = - SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex)); - - ASSERT_EQ(search_result_on_raw_index_json.dump(1), - search_result_on_bigIndex_json.dump(1)); - - DeleteLoadIndexInfo(c_load_index_info); - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(c_search_result_on_smallIndex); - DeleteSearchResult(c_search_result_on_bigIndex); - DeleteCollection(collection); - DeleteSegment(segment); -} - -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_FLOAT16) { - auto c_collection = - NewCollection(get_float16_schema_config(), knowhere::metric::IP); - CSegmentInterface segment; - auto status = NewSegment(c_collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - auto col = (milvus::segcore::Collection*)c_collection; - - int N = 10000; - auto dataset = DataGen(col->get_schema(), N); - int64_t ts_offset = 1000; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - const char* raw_plan = R"(vector_anns: < - field_id: 100 - query_info: < - topk: 10 - round_decimal: 3 - metric_type: "IP" - search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}" - > - placeholder_tag: "$0" - >)"; - auto plan_str = translate_text_plan_to_binary_plan(raw_plan); - - int num_queries = 10; - auto blob = generate_query_data_float16(num_queries); - - void* plan = nullptr; - status = CreateSearchPlanByExpr( - c_collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - CSearchResult search_result; - auto res = - CSearch(segment, plan, placeholderGroup, ts_offset, &search_result); - ASSERT_EQ(res.error_code, Success); - - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(search_result); - DeleteCollection(c_collection); - DeleteSegment(segment); -} - -TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) { - auto c_collection = - NewCollection(get_bfloat16_schema_config(), knowhere::metric::IP); - CSegmentInterface segment; - auto status = NewSegment(c_collection, Growing, -1, &segment, false); - ASSERT_EQ(status.error_code, Success); - auto col = (milvus::segcore::Collection*)c_collection; - - int N = 10000; - auto dataset = DataGen(col->get_schema(), N); - int64_t ts_offset = 1000; - - int64_t offset; - PreInsert(segment, N, &offset); - - auto insert_data = serialize(dataset.raw_); - auto ins_res = Insert(segment, - offset, - N, - dataset.row_ids_.data(), - dataset.timestamps_.data(), - insert_data.data(), - insert_data.size()); - ASSERT_EQ(ins_res.error_code, Success); - - const char* raw_plan = R"(vector_anns: < - field_id: 100 - query_info: < - topk: 10 - round_decimal: 3 - metric_type: "IP" - search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}" - > - placeholder_tag: "$0" - >)"; - auto plan_str = translate_text_plan_to_binary_plan(raw_plan); - - int num_queries = 10; - auto blob = generate_query_data_bfloat16(num_queries); - - void* plan = nullptr; - status = CreateSearchPlanByExpr( - c_collection, plan_str.data(), plan_str.size(), &plan); - ASSERT_EQ(status.error_code, Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup( - plan, blob.data(), blob.length(), &placeholderGroup); - ASSERT_EQ(status.error_code, Success); - - std::vector placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - - CSearchResult search_result; - auto res = - CSearch(segment, plan, placeholderGroup, ts_offset, &search_result); - ASSERT_EQ(res.error_code, Success); - - DeleteSearchPlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteSearchResult(search_result); - DeleteCollection(c_collection); - DeleteSegment(segment); -} - TEST(CApiTest, IsLoadWithDisk) { ASSERT_TRUE(IsLoadWithDisk(INVERTED_INDEX_TYPE, 0)); } diff --git a/internal/core/unittest/test_float16.cpp b/internal/core/unittest/test_float16.cpp index 670855c5c330d..122adc5208b04 100644 --- a/internal/core/unittest/test_float16.cpp +++ b/internal/core/unittest/test_float16.cpp @@ -118,7 +118,8 @@ TEST(Float16, ExecWithoutPredicateFlat) { auto vec_ptr = dataset.get_col(vec_fid); auto num_queries = 5; - auto ph_group_raw = CreateFloat16PlaceholderGroup(num_queries, 32, 1024); + auto ph_group_raw = + CreatePlaceholderGroup(num_queries, 32, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -274,7 +275,8 @@ TEST(Float16, ExecWithPredicate) { auto plan = CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreateFloat16PlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = + CreatePlaceholderGroup(num_queries, 16, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -354,7 +356,8 @@ TEST(BFloat16, ExecWithoutPredicateFlat) { auto vec_ptr = dataset.get_col(vec_fid); auto num_queries = 5; - auto ph_group_raw = CreateBFloat16PlaceholderGroup(num_queries, 32, 1024); + auto ph_group_raw = + CreatePlaceholderGroup(num_queries, 32, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -510,7 +513,8 @@ TEST(BFloat16, ExecWithPredicate) { auto plan = CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreateBFloat16PlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = + CreatePlaceholderGroup(num_queries, 16, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; diff --git a/internal/core/unittest/test_query.cpp b/internal/core/unittest/test_query.cpp index a9e4c80eb0ea3..2b1f0d5313c35 100644 --- a/internal/core/unittest/test_query.cpp +++ b/internal/core/unittest/test_query.cpp @@ -725,7 +725,7 @@ TEST(Query, ExecWithPredicateBinary) { auto plan = CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob( + auto ph_group_raw = CreatePlaceholderGroupFromBlob( num_queries, 512, vec_ptr.data() + 1024 * 512 / 8); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index c49ee0d64e640..7f904f6e44659 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -743,26 +743,6 @@ DataGenForJsonArray(SchemaPtr schema, return res; } -inline auto -CreatePlaceholderGroup(int64_t num_queries, int dim, int64_t seed = 42) { - namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::FloatVector); - std::normal_distribution dis(0, 1); - std::default_random_engine e(seed); - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(dis(e)); - } - // std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float)); - value->add_values(vec.data(), vec.size() * sizeof(float)); - } - return raw_group; -} - inline auto CreatePlaceholderGroup(int64_t num_queries, int dim, @@ -782,148 +762,57 @@ CreatePlaceholderGroup(int64_t num_queries, return raw_group; } -inline auto -CreatePlaceholderGroupFromBlob(int64_t num_queries, int dim, const float* src) { - namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::FloatVector); - int64_t src_index = 0; - - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(src[src_index++]); - } - // std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float)); - value->add_values(vec.data(), vec.size() * sizeof(float)); - } - return raw_group; -} - -inline auto -CreateBinaryPlaceholderGroup(int64_t num_queries, - int64_t dim, - int64_t seed = 42) { - assert(dim % 8 == 0); - namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::BinaryVector); - std::default_random_engine e(seed); - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim / 8; ++d) { - vec.push_back(e()); - } - // std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float)); - value->add_values(vec.data(), vec.size()); +template +auto +CreatePlaceholderGroup(int64_t num_queries, int dim, int64_t seed = 42) { + if (std::is_same_v) { + assert(dim % 8 == 0); } - return raw_group; -} - -inline auto -CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries, - int64_t dim, - const uint8_t* ptr) { - assert(dim % 8 == 0); namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::BinaryVector); - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim / 8; ++d) { - vec.push_back(*ptr); - ++ptr; - } - // std::string line((char*)vec.data(), (char*)vec.data() + vec.size() * sizeof(float)); - value->add_values(vec.data(), vec.size()); - } - return raw_group; -} + GET_ELEM_TYPE_FOR_VECTOR_TRAIT -inline auto -CreateFloat16PlaceholderGroup(int64_t num_queries, - int64_t dim, - int64_t seed = 42) { - namespace ser = milvus::proto::common; ser::PlaceholderGroup raw_group; auto value = raw_group.add_placeholders(); value->set_tag("$0"); - value->set_type(ser::PlaceholderType::Float16Vector); + value->set_type(TraitType::placeholder_type); + // TODO caiyd: need update for Int8Vector std::normal_distribution dis(0, 1); std::default_random_engine e(seed); for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(float16(dis(e))); + std::vector vec; + for (int d = 0; d < dim / TraitType::dim_factor; ++d) { + if (std::is_same_v) { + vec.push_back(e()); + } else { + vec.push_back(elem_type(dis(e))); + } } - value->add_values(vec.data(), vec.size() * sizeof(float16)); + value->add_values(vec.data(), vec.size() * sizeof(elem_type)); } return raw_group; } +template inline auto -CreateFloat16PlaceholderGroupFromBlob(int64_t num_queries, - int64_t dim, - const float16* ptr) { - namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::Float16Vector); - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(*ptr); - ++ptr; - } - value->add_values(vec.data(), vec.size() * sizeof(float16)); +CreatePlaceholderGroupFromBlob(int64_t num_queries, int dim, const void* src) { + if (std::is_same_v) { + assert(dim % 8 == 0); } - return raw_group; -} - -inline auto -CreateBFloat16PlaceholderGroup(int64_t num_queries, - int64_t dim, - int64_t seed = 42) { namespace ser = milvus::proto::common; - ser::PlaceholderGroup raw_group; - auto value = raw_group.add_placeholders(); - value->set_tag("$0"); - value->set_type(ser::PlaceholderType::BFloat16Vector); - std::normal_distribution dis(0, 1); - std::default_random_engine e(seed); - for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(bfloat16(dis(e))); - } - value->add_values(vec.data(), vec.size() * sizeof(bfloat16)); - } - return raw_group; -} + GET_ELEM_TYPE_FOR_VECTOR_TRAIT -inline auto -CreateBFloat16PlaceholderGroupFromBlob(int64_t num_queries, - int64_t dim, - const bfloat16* ptr) { - namespace ser = milvus::proto::common; ser::PlaceholderGroup raw_group; auto value = raw_group.add_placeholders(); value->set_tag("$0"); - value->set_type(ser::PlaceholderType::BFloat16Vector); + value->set_type(TraitType::placeholder_type); + int64_t src_index = 0; + for (int i = 0; i < num_queries; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(*ptr); - ++ptr; + std::vector vec; + for (int d = 0; d < dim / TraitType::dim_factor; ++d) { + vec.push_back(((elem_type*)src)[src_index++]); } - value->add_values(vec.data(), vec.size() * sizeof(bfloat16)); + value->add_values(vec.data(), vec.size() * sizeof(elem_type)); } return raw_group; } diff --git a/internal/core/unittest/test_utils/c_api_test_utils.h b/internal/core/unittest/test_utils/c_api_test_utils.h index b83d6e5640b92..86e07c727c6e8 100644 --- a/internal/core/unittest/test_utils/c_api_test_utils.h +++ b/internal/core/unittest/test_utils/c_api_test_utils.h @@ -23,6 +23,7 @@ #include "common/Types.h" #include "common/type_c.h" +#include "common/VectorTrait.h" #include "pb/plan.pb.h" #include "segcore/Collection.h" #include "segcore/reduce/Reduce.h" @@ -32,7 +33,6 @@ #include "futures/future_c.h" #include "DataGen.h" #include "PbHelper.h" -#include "c_api_test_utils.h" #include "indexbuilder_test_utils.h" using namespace milvus; @@ -66,26 +66,30 @@ generate_max_float_query_data(int all_nq, int max_float_nq) { return blob; } +template std::string generate_query_data(int nq) { namespace ser = milvus::proto::common; + GET_ELEM_TYPE_FOR_VECTOR_TRAIT + std::default_random_engine e(67); int dim = DIM; - std::normal_distribution dis(0.0, 1.0); + std::uniform_int_distribution dis(-128, 127); ser::PlaceholderGroup raw_group; auto value = raw_group.add_placeholders(); value->set_tag("$0"); - value->set_type(ser::PlaceholderType::FloatVector); + value->set_type(TraitType::placeholder_type); for (int i = 0; i < nq; ++i) { - std::vector vec; - for (int d = 0; d < dim; ++d) { - vec.push_back(dis(e)); + std::vector vec; + for (int d = 0; d < dim / TraitType::dim_factor; ++d) { + vec.push_back((elem_type)dis(e)); } - value->add_values(vec.data(), vec.size() * sizeof(float)); + value->add_values(vec.data(), vec.size() * sizeof(elem_type)); } auto blob = raw_group.SerializeAsString(); return blob; } + void CheckSearchResultDuplicate(const std::vector& results, int group_size = 1) { @@ -117,13 +121,14 @@ CheckSearchResultDuplicate(const std::vector& results, } } +template const char* get_default_schema_config() { - static std::string conf = R"(name: "default-collection" + auto fmt = boost::format(R"(name: "default-collection" fields: < fieldID: 100 name: "fakevec" - data_type: FloatVector + data_type: %1% type_params: < key: "dim" value: "16" @@ -138,9 +143,9 @@ get_default_schema_config() { name: "age" data_type: Int64 is_primary_key: true - >)"; - static std::string fake_conf = ""; - return conf.c_str(); + >)") % + (int(TraitType::data_type)); + return fmt.str().c_str(); } const char*