diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs
new file mode 100644
index 0000000000..b72ec02e0d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs
@@ -0,0 +1,97 @@
+// Lucene version compatibility level 8.2.0
+// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+#nullable enable
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ ///
+ /// Adds the as a synonym,
+ /// i.e. another token at the same position, optionally with a specified prefix prepended.
+ ///
+ public sealed class TypeAsSynonymFilter : TokenFilter
+ {
+ private readonly ICharTermAttribute termAtt;
+ private readonly ITypeAttribute typeAtt;
+ private readonly IPositionIncrementAttribute posIncrAtt;
+ private readonly string? prefix;
+
+ private State? savedToken = null;
+
+ ///
+ /// Initializes a new instance of with
+ /// the specified token stream.
+ ///
+ /// Input token stream.
+ public TypeAsSynonymFilter(TokenStream input)
+ : this(input, null)
+ {
+ }
+
+ ///
+ /// Initializes a new instance of with
+ /// the specified token stream and prefix.
+ ///
+ /// Input token stream.
+ /// Prepend this string to every token type emitted as token text.
+ /// If null, nothing will be prepended.
+ public TypeAsSynonymFilter(TokenStream input, string? prefix)
+ : base(input)
+ {
+ this.prefix = prefix;
+ termAtt = AddAttribute();
+ typeAtt = AddAttribute();
+ posIncrAtt = AddAttribute();
+ }
+
+
+ public override bool IncrementToken()
+ {
+ if (savedToken != null)
+ {
+ // Emit last token's type at the same position
+ RestoreState(savedToken);
+ savedToken = null;
+ termAtt.SetEmpty();
+ if (prefix != null)
+ {
+ termAtt.Append(prefix);
+ }
+ termAtt.Append(typeAtt.Type);
+ posIncrAtt.PositionIncrement = 0;
+ return true;
+ }
+ else if (m_input.IncrementToken())
+ {
+ // Ho pending token type to emit
+ savedToken = CaptureState();
+ return true;
+ }
+ return false;
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ savedToken = null;
+ }
+ }
+}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs
new file mode 100644
index 0000000000..462be60bd7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs
@@ -0,0 +1,62 @@
+// Lucene version compatibility level 8.2.0
+// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+#nullable enable
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ ///
+ /// Factory for .
+ ///
+ /// <fieldType name="text_type_as_synonym" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
+ /// <filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_" />
+ /// </analyzer>
+ /// </fieldType>
+ ///
+ ///
+ ///
+ /// If the optional prefix parameter is used, the specified value will be prepended
+ /// to the type, e.g.with prefix = "_type_", for a token "example.com" with type "<URL>",
+ /// the emitted synonym will have text "_type_<URL>".
+ ///
+ public class TypeAsSynonymFilterFactory : TokenFilterFactory
+ {
+ private readonly string prefix;
+
+ public TypeAsSynonymFilterFactory(IDictionary args)
+ : base(args)
+ {
+ prefix = Get(args, "prefix"); // default value is null
+ if (args.Count > 0)
+ {
+ throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args));
+ }
+ }
+
+ public override TokenStream Create(TokenStream input)
+ {
+ return new TypeAsSynonymFilter(input, prefix);
+ }
+ }
+}
diff --git a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs
index e7d787d041..824eaec95d 100644
--- a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
namespace Lucene.Net.Analysis
{
@@ -31,6 +31,7 @@ public sealed class CannedTokenStream : TokenStream
private readonly IPositionLengthAttribute posLengthAtt;
private readonly IOffsetAttribute offsetAtt;
private readonly IPayloadAttribute payloadAtt;
+ private readonly ITypeAttribute typeAtt; // LUCENENET specific - See IncrementToken()
private readonly int finalOffset;
private readonly int finalPosInc;
@@ -49,6 +50,7 @@ public CannedTokenStream(int finalPosInc, int finalOffset, params Token[] tokens
posLengthAtt = AddAttribute();
offsetAtt = AddAttribute();
payloadAtt = AddAttribute();
+ typeAtt = AddAttribute(); // LUCENENET specific - See IncrementToken()
this.tokens = tokens;
this.finalOffset = finalOffset;
@@ -76,6 +78,12 @@ public override bool IncrementToken()
posLengthAtt.PositionLength = token.PositionLength;
offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
payloadAtt.Payload = token.Payload;
+
+ // LUCENENET: This change is from https://github.com/apache/lucene/commit/72eaeab7151d421a28ecec1634b8c48599e524f5.
+ // We need it for the TestTypeAsSynonymFilterFactory tests to pass (from lucene 8.2.0).
+ // But we don't yet have all of the PackedTokenAttributeImpl plumbing it takes to do it the way they did,
+ // so setting it explicitly as a workaround.
+ typeAtt.Type = token.Type;
return true;
}
else
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs
new file mode 100644
index 0000000000..08f7e396ef
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs
@@ -0,0 +1,54 @@
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestTypeAsSynonymFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ private static readonly Token[] TOKENS = { token("Visit", ""), token("example.com", "") };
+
+ [Test]
+ public void TestBasic()
+ {
+ TokenStream stream = new CannedTokenStream(TOKENS);
+ stream = TokenFilterFactory("TypeAsSynonym").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "Visit", "", "example.com", "" },
+ null, null, new string[] { "", "", "", "" }, new int[] { 1, 0, 1, 0 });
+ }
+
+ [Test]
+ public void TestPrefix()
+ {
+ TokenStream stream = new CannedTokenStream(TOKENS);
+ stream = TokenFilterFactory("TypeAsSynonym", "prefix", "_type_").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "Visit", "_type_", "example.com", "_type_" },
+ null, null, new string[] { "", "", "", "" }, new int[] { 1, 0, 1, 0 });
+ }
+
+ private static Token token(string term, string type)
+ {
+ Token token = new Token();
+ token.SetEmpty();
+ token.Append(term);
+ token.Type = type;
+ return token;
+ }
+ }
+}