Skip to content

Commit

Permalink
SOLR-17471: upgrade Lucene to version 9.12.1 (#2736)
Browse files Browse the repository at this point in the history
  • Loading branch information
psalagnac authored Jan 6, 2025
1 parent f844b7f commit ee1e48d
Show file tree
Hide file tree
Showing 74 changed files with 141 additions and 129 deletions.
4 changes: 2 additions & 2 deletions dev-docs/lucene-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Create a new branch locally e.g. `git checkout -b lucene940 -t origin/main` for

## Build

### `versions.props` update
### `gradle/libs.versions.toml` update

```
- org.apache.lucene:*=9.3.0
Expand All @@ -37,7 +37,7 @@ Create a new branch locally e.g. `git checkout -b lucene940 -t origin/main` for
### `versions.lock` update

```
gradlew --write-locks
gradlew :writeLocks
```

### `solr/licenses` update
Expand Down
2 changes: 1 addition & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ apache-httpcomponents-httpcore = "4.4.16"
apache-httpcomponents-httpmime = "4.5.14"
apache-kafka = "3.7.1"
apache-log4j = "2.21.0"
apache-lucene = "9.11.1"
apache-lucene = "9.12.1"
apache-opennlp = "1.9.4"
apache-poi = "5.2.2"
apache-rat = "0.15"
Expand Down
2 changes: 1 addition & 1 deletion solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ Bug Fixes

Dependency Upgrades
---------------------
(No changes)
* SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)

Other Changes
---------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec.Mode;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec.Mode;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
Expand Down Expand Up @@ -97,7 +97,7 @@ public void init(NamedList<?> args) {
log.debug("Using default compressionMode: {}", compressionMode);
}
codec =
new Lucene99Codec(compressionMode) {
new Lucene912Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.backup.Checksum;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;

Expand Down Expand Up @@ -217,9 +216,8 @@ default void copyIndexFileFrom(
Directory sourceDir, String sourceFileName, Directory destDir, String destFileName)
throws IOException {
boolean success = false;
try (ChecksumIndexInput is =
sourceDir.openChecksumInput(sourceFileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
IndexOutput os = destDir.createOutput(destFileName, DirectoryFactory.IOCONTEXT_NO_CACHE)) {
try (ChecksumIndexInput is = sourceDir.openChecksumInput(sourceFileName, IOContext.READONCE);
IndexOutput os = destDir.createOutput(destFileName, IOContext.READONCE)) {
os.copyBytes(is, is.length() - CodecUtil.footerLength());

// ensure that index file is not corrupted
Expand Down Expand Up @@ -300,8 +298,8 @@ default void copyFileNoChecksum(
Directory sourceDir, String sourceFileName, Directory destDir, String destFileName)
throws IOException {
boolean success = false;
try (IndexInput is = sourceDir.openInput(sourceFileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
IndexOutput os = destDir.createOutput(destFileName, DirectoryFactory.IOCONTEXT_NO_CACHE)) {
try (IndexInput is = sourceDir.openInput(sourceFileName, IOContext.READONCE);
IndexOutput os = destDir.createOutput(destFileName, IOContext.READONCE)) {
os.copyBytes(is, is.length());
success = true;
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* <p>Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link ClassicSimilarity#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {@link ClassicSimilarity#getDiscountOverlaps()}.
* </ul>
*
* @see TFIDFSimilarity
Expand All @@ -41,13 +41,13 @@ public class ClassicSimilarityFactory extends SimilarityFactory {

/**
* Init param name for specifying the value to use in {@link
* ClassicSimilarity#setDiscountOverlaps(boolean)}
* ClassicSimilarity#getDiscountOverlaps()}.
*/
public static final String DISCOUNT_OVERLAPS = "discountOverlaps";

/**
* Controls the value of {@link ClassicSimilarity#setDiscountOverlaps(boolean)} on newly
* constructed instances of {@link ClassicSimilarity}
* Controls the value of {@link ClassicSimilarity#getDiscountOverlaps()} on newly constructed
* instances of {@link ClassicSimilarity}
*/
protected boolean discountOverlaps = true;

Expand All @@ -59,8 +59,6 @@ public void init(SolrParams params) {

@Override
public Similarity getSimilarity() {
ClassicSimilarity sim = new ClassicSimilarity();
sim.setDiscountOverlaps(discountOverlaps);
return sim;
return new ClassicSimilarity(discountOverlaps);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
* Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link
* org.apache.lucene.search.similarities.SimilarityBase#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {link Similarity#getDiscountOverlaps()}
* </ul>
*
* @lucene.experimental
Expand All @@ -59,9 +58,7 @@ public void init(SolrParams params) {

@Override
public Similarity getSimilarity() {
DFISimilarity sim = new DFISimilarity(independenceMeasure);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
return new DFISimilarity(independenceMeasure, discountOverlaps);
}

private Independence parseIndependenceMeasure(String expr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
* <p>Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link DFRSimilarity#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {@link Similarity#getDiscountOverlaps()}
* </ul>
*
* @lucene.experimental
Expand Down Expand Up @@ -160,8 +160,6 @@ static Normalization parseNormalization(String expr, String c, String mu, String

@Override
public Similarity getSimilarity() {
DFRSimilarity sim = new DFRSimilarity(basicModel, afterEffect, normalization);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
return new DFRSimilarity(basicModel, afterEffect, normalization, discountOverlaps);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
* <p>Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link IBSimilarity#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {link Similarity#getDiscountOverlaps()}
* </ul>
*
* @lucene.experimental
Expand Down Expand Up @@ -100,8 +100,6 @@ private Lambda parseLambda(String expr) {

@Override
public Similarity getSimilarity() {
IBSimilarity sim = new IBSimilarity(distribution, lambda, normalization);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
return new IBSimilarity(distribution, lambda, normalization, discountOverlaps);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.solr.search.similarities;

import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
Expand All @@ -33,7 +34,7 @@
* <p>Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link LMDirichletSimilarity#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {link Similarity#getDiscountOverlaps()}
* </ul>
*
* @lucene.experimental
Expand All @@ -51,9 +52,13 @@ public void init(SolrParams params) {

@Override
public Similarity getSimilarity() {
LMDirichletSimilarity sim =
(mu != null) ? new LMDirichletSimilarity(mu) : new LMDirichletSimilarity();
sim.setDiscountOverlaps(discountOverlaps);
return sim;

// Default μ is 2000 in Lucene. Unfortunately, there is no constant we can use
if (mu == null) {
mu = 2000f;
}

LMSimilarity.CollectionModel model = new LMSimilarity.DefaultCollectionModel();
return new LMDirichletSimilarity(model, discountOverlaps, mu);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.solr.search.similarities;

import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LMSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
Expand All @@ -33,8 +34,7 @@
* <p>Optional settings:
*
* <ul>
* <li>discountOverlaps (bool): Sets {@link
* LMJelinekMercerSimilarity#setDiscountOverlaps(boolean)}
* <li>discountOverlaps (bool): Sets {link Similarity#getDiscountOverlaps()}
* </ul>
*
* @lucene.experimental
Expand All @@ -52,8 +52,7 @@ public void init(SolrParams params) {

@Override
public Similarity getSimilarity() {
LMJelinekMercerSimilarity sim = new LMJelinekMercerSimilarity(lambda);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
LMSimilarity.CollectionModel model = new LMSimilarity.DefaultCollectionModel();
return new LMJelinekMercerSimilarity(model, discountOverlaps, lambda);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,33 +104,44 @@
* </code> for SVG diagrams showing how the each function behaves with various settings/inputs.
*/
public class SweetSpotSimilarityFactory extends ClassicSimilarityFactory {
private SweetSpotSimilarity sim = null;

private Integer ln_min;
private Integer ln_max;
private Float ln_steep;

private Float hyper_min;
private Float hyper_max;
private Double hyper_base;
private Float hyper_offset;

private Float baseline_base;
private Float baseline_min;

@Override
public void init(SolrParams params) {
super.init(params);

Integer ln_min = params.getInt("lengthNormMin");
Integer ln_max = params.getInt("lengthNormMax");
Float ln_steep = params.getFloat("lengthNormSteepness");
ln_min = params.getInt("lengthNormMin");
ln_max = params.getInt("lengthNormMax");
ln_steep = params.getFloat("lengthNormSteepness");
if (!allOrNoneNull(ln_min, ln_max, ln_steep)) {
throw new SolrException(
SERVER_ERROR,
"Overriding default lengthNorm settings requires all to be specified: lengthNormMin, lengthNormMax, lengthNormSteepness");
}

Float hyper_min = params.getFloat("hyperbolicTfMin");
Float hyper_max = params.getFloat("hyperbolicTfMax");
Double hyper_base = params.getDouble("hyperbolicTfBase");
Float hyper_offset = params.getFloat("hyperbolicTfOffset");
hyper_min = params.getFloat("hyperbolicTfMin");
hyper_max = params.getFloat("hyperbolicTfMax");
hyper_base = params.getDouble("hyperbolicTfBase");
hyper_offset = params.getFloat("hyperbolicTfOffset");
if (!allOrNoneNull(hyper_min, hyper_max, hyper_base, hyper_offset)) {
throw new SolrException(
SERVER_ERROR,
"Overriding default hyperbolicTf settings requires all to be specified: hyperbolicTfMin, hyperbolicTfMax, hyperbolicTfBase, hyperbolicTfOffset");
}

Float baseline_base = params.getFloat("baselineTfBase");
Float baseline_min = params.getFloat("baselineTfMin");
baseline_base = params.getFloat("baselineTfBase");
baseline_min = params.getFloat("baselineTfMin");
if (!allOrNoneNull(baseline_min, baseline_base)) {
throw new SolrException(
SERVER_ERROR,
Expand All @@ -142,13 +153,19 @@ public void init(SolrParams params) {
throw new SolrException(
SERVER_ERROR, "Can not mix hyperbolicTf settings with baselineTf settings");
}
}

@Override
public Similarity getSimilarity() {
// pick Similarity impl based on whether hyper tf settings are set
sim = (null != hyper_min) ? new HyperbolicSweetSpotSimilarity() : new SweetSpotSimilarity();
SweetSpotSimilarity sim =
(null != hyper_min)
? new HyperbolicSweetSpotSimilarity(discountOverlaps)
: new SweetSpotSimilarity(discountOverlaps);

if (null != ln_min) {
// overlaps already handled by super factory
sim.setLengthNormFactors(ln_min, ln_max, ln_steep, this.discountOverlaps);
sim.setLengthNormFactors(ln_min, ln_max, ln_steep);
}

if (null != hyper_min) {
Expand All @@ -158,11 +175,7 @@ public void init(SolrParams params) {
if (null != baseline_min) {
sim.setBaselineTfFactors(baseline_base, baseline_min);
}
}

@Override
public Similarity getSimilarity() {
assert sim != null : "SweetSpotSimilarityFactory was not initialized";
return sim;
}

Expand All @@ -181,6 +194,11 @@ private static boolean allOrNoneNull(Object... args) {
}

private static final class HyperbolicSweetSpotSimilarity extends SweetSpotSimilarity {

private HyperbolicSweetSpotSimilarity(boolean discountOverlaps) {
super(discountOverlaps);
}

@Override
public float tf(float freq) {
return hyperbolicTf(freq);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
as a way to vet that the configuration actually matters.
-->
<fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct" docValuesFormat="Asserting" />
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene99"/>
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene912"/>

<fieldType name="string_disk" class="solr.StrField" />

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec.Mode;
import org.apache.lucene.codecs.lucene912.Lucene912Codec.Mode;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.SegmentInfo;
Expand Down
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-common-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-common-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
86836497e35c1ab33259d9864ceb280c0016075e
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-icu-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-icu-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abaef4767ad64289e62abdd4606bf6ed2ddea0fd
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-kuromoji-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-kuromoji-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
635c41143b896f402589d29e33695dcfabae9cc5
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-morfologik-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-morfologik-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
d8e4716dab6d829e7b37a8b185cbd242650aeb9e
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-nori-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-nori-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e265410a6a4d9cd23b2e9c73321e6bd307bc1422
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-opennlp-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-opennlp-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
021d892f1946bc238c6e9a651f9446813b715c5a
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-phonetic-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-phonetic-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3787b8edc0cfad21998abc6aeb9d2cbf152b4b26
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-smartcn-9.11.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions solr/licenses/lucene-analysis-smartcn-9.12.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e935f600bf153c46f5725198ca9352c32025f274
1 change: 0 additions & 1 deletion solr/licenses/lucene-analysis-stempel-9.11.1.jar.sha1

This file was deleted.

Loading

0 comments on commit ee1e48d

Please sign in to comment.