001// --------------------------------------------------------------------------------
002// Copyright 2002-2024 Echo Three, LLC
003//
004// Licensed under the Apache License, Version 2.0 (the "License");
005// you may not use this file except in compliance with the License.
006// You may obtain a copy of the License at
007//
008//     http://www.apache.org/licenses/LICENSE-2.0
009//
010// Unless required by applicable law or agreed to in writing, software
011// distributed under the License is distributed on an "AS IS" BASIS,
012// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013// See the License for the specific language governing permissions and
014// limitations under the License.
015// --------------------------------------------------------------------------------
016
017package com.echothree.model.control.index.server.analysis;
018
019import com.echothree.model.control.core.common.EntityAttributeTypes;
020import com.echothree.model.control.core.server.control.CoreControl;
021import com.echothree.model.control.index.common.IndexFields;
022import com.echothree.model.control.index.server.indexer.IndexerDebugFlags;
023import com.echothree.model.control.party.common.Languages;
024import com.echothree.model.control.tag.server.control.TagControl;
025import com.echothree.model.control.workflow.server.control.WorkflowControl;
026import com.echothree.model.data.core.server.entity.EntityAliasType;
027import com.echothree.model.data.core.server.entity.EntityAttribute;
028import com.echothree.model.data.core.server.entity.EntityType;
029import com.echothree.model.data.party.server.entity.Language;
030import com.echothree.model.data.tag.server.entity.TagScope;
031import com.echothree.util.server.message.ExecutionErrorAccumulator;
032import com.echothree.util.server.persistence.Session;
033import java.util.HashMap;
034import java.util.List;
035import java.util.Map;
036import org.apache.commons.logging.Log;
037import org.apache.commons.logging.LogFactory;
038import org.apache.lucene.analysis.Analyzer;
039import org.apache.lucene.analysis.AnalyzerWrapper;
040import org.apache.lucene.analysis.cjk.CJKAnalyzer;
041import org.apache.lucene.analysis.de.GermanAnalyzer;
042import org.apache.lucene.analysis.en.EnglishAnalyzer;
043import org.apache.lucene.analysis.es.SpanishAnalyzer;
044import org.apache.lucene.analysis.fr.FrenchAnalyzer;
045import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
046import org.apache.lucene.analysis.standard.StandardAnalyzer;
047
048public class BasicAnalyzer
049        extends AnalyzerWrapper {
050    
051    protected Log log = LogFactory.getLog(this.getClass());
052
053    private ExecutionErrorAccumulator eea;
054    private EntityType entityType;
055    private List<EntityAliasType> entityAliasTypes;
056    private List<EntityAttribute> entityAttributes;
057    private List<TagScope> tagScopes;
058
059    private Analyzer defaultAnalyzer;
060    private Map<String, Analyzer> cachedFieldAnalyzers = null;
061
062    private void init(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType,
063            final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes,
064            final List<TagScope> tagScopes) {
065        this.eea = eea;
066        this.entityType = entityType;
067        this.entityAliasTypes = entityAliasTypes;
068        this.entityAttributes = entityAttributes;
069        this.tagScopes = tagScopes;
070
071        defaultAnalyzer = getDefaultAnalyzer(eea, language);
072    }
073
074    @Override
075    public void close() {
076        super.close();
077        
078        defaultAnalyzer.close();
079        defaultAnalyzer = null;
080
081        if(cachedFieldAnalyzers != null) {
082            for(var cachedFieldAnalyzer : cachedFieldAnalyzers.values()) {
083                cachedFieldAnalyzer.close();
084            }
085            cachedFieldAnalyzers = null;
086        }
087    }
088    
089    public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType,
090            final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes,
091            final List<TagScope> tagScopes) {
092        super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY);
093        
094        init(eea, language, entityType, entityAliasTypes, entityAttributes, tagScopes);
095    }
096
097    public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType) {
098        super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY);
099        
100        var coreControl = Session.getModelController(CoreControl.class);
101        var tagControl = Session.getModelController(TagControl.class);
102        
103        init(eea, language, entityType, coreControl.getEntityAliasTypesByEntityType(entityType),
104                coreControl.getEntityAttributesByEntityType(entityType), tagControl.getTagScopesByEntityType(entityType));
105    }
106
107    @Override
108    protected Analyzer getWrappedAnalyzer(String fieldName) {
109        // Hold a cache of Analyzers.
110        if(cachedFieldAnalyzers == null) {
111            cachedFieldAnalyzers = getFieldAnalyzers(eea, entityType, entityAttributes, tagScopes);
112        }
113
114        var analyzer = cachedFieldAnalyzers.get(fieldName);
115        
116        return (analyzer != null) ? analyzer : defaultAnalyzer;
117    }
118
119    @Override
120    protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
121        return components;
122    }
123
124    @Override
125    public String toString() {
126        return "BasicAnalyzer(" + cachedFieldAnalyzers + ", default=" + defaultAnalyzer + ")";
127    }
128
129    private Analyzer getDefaultAnalyzer(final ExecutionErrorAccumulator eea, final Language language) {
130        Analyzer selectedAnalyzer = null;
131        
132        if(language != null) {
133            String languageIsoName = language.getLanguageIsoName();
134            
135            if(languageIsoName.equals(Languages.en.name())) {
136                selectedAnalyzer = new EnglishAnalyzer();
137            } else if(languageIsoName.equals(Languages.de.name())) {
138                selectedAnalyzer = new GermanAnalyzer();
139            } else if(languageIsoName.equals(Languages.es.name())) {
140                selectedAnalyzer = new SpanishAnalyzer();
141            } else if(languageIsoName.equals(Languages.fr.name())) {
142                selectedAnalyzer = new FrenchAnalyzer();
143            } else if(languageIsoName.equals(Languages.jp.name())) {
144                selectedAnalyzer = new JapaneseAnalyzer();
145            } else if(languageIsoName.equals(Languages.ko.name()) || languageIsoName.equals(Languages.zh.name())) {
146                selectedAnalyzer = new CJKAnalyzer();
147            }
148        }
149        
150        return selectedAnalyzer == null ? new StandardAnalyzer() : selectedAnalyzer;
151    }
152
153    private Map<String, Analyzer> getEntityAliasesFieldAnalyzers(final List<EntityAliasType> entityAliasTypes, final Map<String, Analyzer> fieldAnalyzers) {
154        entityAliasTypes.stream().map(EntityAliasType::getLastDetail).forEach((entityAliasTypeDetail) -> {
155            var fieldName = entityAliasTypeDetail.getEntityAliasTypeName();
156            if(IndexerDebugFlags.LogBaseAnalyzer) {
157                log.info("--- fieldName = " + fieldName);
158            }
159
160            fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
161        });
162
163        return fieldAnalyzers;
164    }
165
166    private Map<String, Analyzer> getEntityAttributeFieldAnalyzers(final List<EntityAttribute> entityAttributes, final Map<String, Analyzer> fieldAnalyzers) {
167        entityAttributes.stream().map(EntityAttribute::getLastDetail).forEach((entityAttributeDetail) -> {
168            String fieldName = entityAttributeDetail.getEntityAttributeName();
169            String entityAttributeTypeName = entityAttributeDetail.getEntityAttributeType().getEntityAttributeTypeName();
170            if(IndexerDebugFlags.LogBaseAnalyzer) {
171                log.info("--- fieldName = " + fieldName + ", entityAttributeTypeName = " + entityAttributeTypeName);
172            }
173
174            // EntityAttributeTypes.INTEGER.name() - treated as a NumericField, no Analyzer.
175            // EntityAttributeTypes.LONG.name() - treated as a NumericField, no Analyzer.
176            // EntityAttributeTypes.DATE.name() - treated as a NumericField, no Analyzer.
177            // EntityAttributeTypes.TIME.name() - treated as a NumericField, no Analyzer.
178            // EntityAttributeTypes.STRING.name() - uses default Analyzer.
179            // EntityAttributeTypes.CLOB.name() - uses default Analyzer.
180            // EntityAttributeTypes.GEOPOINT.name() - ignored.
181            if (entityAttributeTypeName.equals(EntityAttributeTypes.BOOLEAN.name())
182                    || entityAttributeTypeName.equals(EntityAttributeTypes.NAME.name())
183                    || entityAttributeTypeName.equals(EntityAttributeTypes.MULTIPLELISTITEM.name())
184                    || entityAttributeTypeName.equals(EntityAttributeTypes.LISTITEM.name())) {
185                // Use the WhitespaceAnalyzer
186                fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
187            }
188        });
189        
190        return fieldAnalyzers;
191    }
192
193    private Map<String, Analyzer> getTagScopeFieldAnalyzers(final List<TagScope> tagScopes, final Map<String, Analyzer> fieldAnalyzers) {
194        tagScopes.stream().map((tagScope) -> tagScope.getLastDetail().getTagScopeName()).map((fieldName) -> {
195            if(IndexerDebugFlags.LogBaseAnalyzer) {
196                log.info("--- fieldName = " + fieldName);
197            }
198            return fieldName;
199        }).forEach((fieldName) -> {
200            fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
201        });
202        
203        return fieldAnalyzers;
204    }
205
206    private Map<String, Analyzer> getWorkflowFieldAnalyzers(final EntityType entityType, final Map<String, Analyzer> fieldAnalyzers) {
207        var workflowControl = Session.getModelController(WorkflowControl.class);
208
209        workflowControl.getWorkflowsByEntityType(entityType).stream().map((workflow) -> workflow.getLastDetail().getWorkflowName()).map((fieldName) -> {
210            if(IndexerDebugFlags.LogBaseAnalyzer) {
211                log.info("--- fieldName = " + fieldName);
212            }
213            return fieldName;
214        }).forEach((fieldName) -> {
215            fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
216        });
217        
218        return fieldAnalyzers;
219    }
220
221    private Map<String, Analyzer> getAppearanceFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) {
222        fieldAnalyzers.put(IndexFields.appearance.name(), new WhitespaceLowerCaseAnalyzer());
223
224        return fieldAnalyzers;
225    }
226
227    protected Map<String, Analyzer> getEntityTypeAnalyzers(final Map<String, Analyzer> fieldAnalyzers) {
228        // No additional Analyzers by default.
229        
230        return fieldAnalyzers;
231    }
232
233    private Map<String, Analyzer> getFieldAnalyzers(final ExecutionErrorAccumulator eea, final EntityType entityType,
234            final List<EntityAttribute> entityAttributes, final List<TagScope> tagScopes) {
235        return getEntityTypeAnalyzers(getAppearanceFieldAnalyzers(getWorkflowFieldAnalyzers(entityType,
236                getTagScopeFieldAnalyzers(tagScopes, getEntityAliasesFieldAnalyzers(entityAliasTypes,
237                        getEntityAttributeFieldAnalyzers(entityAttributes, new HashMap<>()))))));
238    }
239    
240}