Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop name from TokenizerFactory #24869

Merged
merged 1 commit into from
May 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.cluster.routing.ShardsIterator;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -179,15 +180,16 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal

} else if (request.tokenizer() != null) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment);
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
analysisRegistry, environment);

TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
tokenFilterFactories = getTokenFilterFactories(request, indexSettings, analysisRegistry, environment, tokenFilterFactories);

CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
charFilterFactories = getCharFilterFactories(request, indexSettings, analysisRegistry, environment, charFilterFactories);

analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), charFilterFactories, tokenFilterFactories);
closeAnalyzer = true;
} else if (analyzer == null) {
if (indexAnalyzers == null) {
Expand Down Expand Up @@ -325,7 +327,8 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
Expand Down Expand Up @@ -551,8 +554,9 @@ private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest reque
return tokenFilterFactories;
}

private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers,
private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers,
AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
String name;
TokenizerFactory tokenizerFactory;
final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer();
// parse anonymous settings
Expand All @@ -568,6 +572,7 @@ private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, In
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]");
}
// Need to set anonymous "name" of tokenizer
name = "_anonymous_tokenizer";
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
Expand All @@ -576,18 +581,20 @@ private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, In
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
}
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(indexAnalzyers.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(indexAnalzyers.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
}
}
return tokenizerFactory;
return new Tuple<>(name, tokenizerFactory);
}

private static IndexSettings getNaIndexSettings(Settings settings) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,14 @@
import org.elasticsearch.index.IndexSettings;

public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory {

private final String name;

protected final Version version;


public AbstractTokenizerFactory(IndexSettings indexSettings, String name, Settings settings) {
// TODO drop `String ignored` in a followup
public AbstractTokenizerFactory(IndexSettings indexSettings, String ignored, Settings settings) {
super(indexSettings);
this.name = name;
this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger);
}

@Override
public String name() {
return this.name;
}

public final Version version() {
return version;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

public final class CustomAnalyzer extends Analyzer {

private final String tokenizerName;
private final TokenizerFactory tokenizerFactory;

private final CharFilterFactory[] charFilters;
Expand All @@ -36,19 +37,27 @@ public final class CustomAnalyzer extends Analyzer {
private final int positionIncrementGap;
private final int offsetGap;

public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters) {
this(tokenizerFactory, charFilters, tokenFilters, 0, -1);
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters) {
this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1);
}

public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters,
int positionIncrementGap, int offsetGap) {
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) {
this.tokenizerName = tokenizerName;
this.tokenizerFactory = tokenizerFactory;
this.charFilters = charFilters;
this.tokenFilters = tokenFilters;
this.positionIncrementGap = positionIncrementGap;
this.offsetGap = offsetGap;
}

/**
* The name of the tokenizer as configured by the user.
*/
public String getTokenizerName() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this? The TokenizerFactory.name() was never used here before.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was used in the analyze action.

return tokenizerName;
}

public TokenizerFactory tokenizerFactory() {
return tokenizerFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void build(final Map<String, TokenizerFactory> tokenizers, final Map<Stri
positionIncrementGap = analyzerSettings.getAsInt("position_increment_gap", positionIncrementGap);

int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);;
this.customAnalyzer = new CustomAnalyzer(tokenizer,
this.customAnalyzer = new CustomAnalyzer(tokenizerName, tokenizer,
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()]),
positionIncrementGap,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public void build(final Map<String, CharFilterFactory> charFilters, final Map<St
}

this.customAnalyzer = new CustomAnalyzer(
"keyword",
PreBuiltTokenizers.KEYWORD.getTokenizerFactory(indexSettings.getIndexVersionCreated()),
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,6 @@ private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, Multi
protected TokenizerFactory create(Version version) {
if (multiTermComponent != null) {
return new MultiTermAwareTokenizerFactory() {
@Override
public String name() {
return getName();
}

@Override
public Tokenizer create() {
return create.apply(version);
Expand All @@ -112,17 +107,7 @@ public Object getMultiTermComponent() {
}
};
} else {
return new TokenizerFactory() {
@Override
public String name() {
return getName();
}

@Override
public Tokenizer create() {
return create.apply(version);
}
};
return () -> create.apply(version);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@

import org.apache.lucene.analysis.Tokenizer;

public interface TokenizerFactory {

String name();

public interface TokenizerFactory { // TODO replace with Supplier<Tokenizer>
Tokenizer create();
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;

import java.util.Locale;

public enum PreBuiltTokenizers {

STANDARD(CachingStrategy.ONE) {
Expand Down Expand Up @@ -148,14 +146,8 @@ private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, Multi
public synchronized TokenizerFactory getTokenizerFactory(final Version version) {
TokenizerFactory tokenizerFactory = cache.get(version);
if (tokenizerFactory == null) {
final String finalName = name().toLowerCase(Locale.ROOT);
if (getMultiTermComponent(version) != null) {
tokenizerFactory = new MultiTermAwareTokenizerFactory() {
@Override
public String name() {
return finalName;
}

@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);
Expand All @@ -168,11 +160,6 @@ public Object getMultiTermComponent() {
};
} else {
tokenizerFactory = new TokenizerFactory() {
@Override
public String name() {
return finalName;
}

@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

Loading