Skip to content

Commit

Permalink
Merge branch 'main' into typo_check
Browse files Browse the repository at this point in the history
  • Loading branch information
broccoliSpicy authored Oct 21, 2024
2 parents fd3807b + 038f239 commit 8228502
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 0 deletions.
5 changes: 5 additions & 0 deletions rust/lance-index/src/scalar/lance_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ impl IndexReader for v2::reader::FileReader {
range: std::ops::Range<usize>,
projection: Option<&[&str]>,
) -> Result<RecordBatch> {
if range.is_empty() {
return Ok(RecordBatch::new_empty(Arc::new(
self.schema().as_ref().into(),
)));
}
let projection = if let Some(projection) = projection {
v2::reader::ReaderProjection::from_column_names(self.schema(), projection)?
} else {
Expand Down
71 changes: 71 additions & 0 deletions rust/lance/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1699,6 +1699,7 @@ mod tests {
use lance_arrow::bfloat16::{self, ARROW_EXT_META_KEY, ARROW_EXT_NAME_KEY, BFLOAT16_EXT_NAME};
use lance_datagen::{array, gen, BatchCount, Dimension, RowCount};
use lance_file::version::LanceFileVersion;
use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
use lance_index::{scalar::ScalarIndexParams, vector::DIST_COL, DatasetIndexExt, IndexType};
use lance_linalg::distance::MetricType;
use lance_table::feature_flags;
Expand Down Expand Up @@ -2793,6 +2794,76 @@ mod tests {
.await
}

#[tokio::test]
async fn test_create_fts_index_with_empty_table() {
let test_dir = tempdir().unwrap();
let test_uri = test_dir.path().to_str().unwrap();

let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
"text",
DataType::Utf8,
false,
)]));

let batches: Vec<RecordBatch> = vec![];
let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone());
let mut dataset = Dataset::write(reader, test_uri, None)
.await
.expect("write dataset");

let params = InvertedIndexParams::default();
dataset
.create_index(&["text"], IndexType::Inverted, None, &params, true)
.await
.unwrap();

let batch = dataset
.scan()
.full_text_search(FullTextSearchQuery::new("lance".to_owned()))
.unwrap()
.try_into_batch()
.await
.unwrap();
assert_eq!(batch.num_rows(), 0);
}

#[tokio::test]
async fn test_create_fts_index_with_empty_strings() {
let test_dir = tempdir().unwrap();
let test_uri = test_dir.path().to_str().unwrap();

let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
"text",
DataType::Utf8,
false,
)]));

let batches: Vec<RecordBatch> = vec![RecordBatch::try_new(
schema.clone(),
vec![Arc::new(StringArray::from(vec!["", "", ""]))],
)
.unwrap()];
let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone());
let mut dataset = Dataset::write(reader, test_uri, None)
.await
.expect("write dataset");

let params = InvertedIndexParams::default();
dataset
.create_index(&["text"], IndexType::Inverted, None, &params, true)
.await
.unwrap();

let batch = dataset
.scan()
.full_text_search(FullTextSearchQuery::new("lance".to_owned()))
.unwrap()
.try_into_batch()
.await
.unwrap();
assert_eq!(batch.num_rows(), 0);
}

#[rstest]
#[tokio::test]
async fn test_bad_field_name(
Expand Down
7 changes: 7 additions & 0 deletions rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,13 @@ impl Scanner {
query.columns.clone()
};

if columns.is_empty() {
return Err(Error::invalid_input(
"Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string(),
location!(),
));
}

// Now the full text search supports only one column
if columns.len() != 1 {
return Err(Error::invalid_input(
Expand Down

0 comments on commit 8228502

Please sign in to comment.