001// --------------------------------------------------------------------------------
002// Copyright 2002-2025 Echo Three, LLC
003//
004// Licensed under the Apache License, Version 2.0 (the "License");
005// you may not use this file except in compliance with the License.
006// You may obtain a copy of the License at
007//
008//     http://www.apache.org/licenses/LICENSE-2.0
009//
010// Unless required by applicable law or agreed to in writing, software
011// distributed under the License is distributed on an "AS IS" BASIS,
012// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013// See the License for the specific language governing permissions and
014// limitations under the License.
015// --------------------------------------------------------------------------------
016
017package com.echothree.model.control.index.server.analyzer;
018
019import com.echothree.model.control.core.common.EntityAttributeTypes;
020import com.echothree.model.control.core.server.control.CoreControl;
021import com.echothree.model.control.core.server.control.EntityAliasControl;
022import com.echothree.model.control.index.common.IndexFields;
023import com.echothree.model.control.index.server.indexer.IndexerDebugFlags;
024import com.echothree.model.control.party.common.Languages;
025import com.echothree.model.control.tag.server.control.TagControl;
026import com.echothree.model.control.workflow.server.control.WorkflowControl;
027import com.echothree.model.data.core.server.entity.EntityAliasType;
028import com.echothree.model.data.core.server.entity.EntityAttribute;
029import com.echothree.model.data.core.server.entity.EntityType;
030import com.echothree.model.data.party.server.entity.Language;
031import com.echothree.model.data.tag.server.entity.TagScope;
032import com.echothree.util.server.message.ExecutionErrorAccumulator;
033import com.echothree.util.server.persistence.Session;
034import java.util.HashMap;
035import java.util.List;
036import java.util.Map;
037import java.util.Set;
038import org.apache.commons.logging.Log;
039import org.apache.commons.logging.LogFactory;
040import org.apache.lucene.analysis.Analyzer;
041import org.apache.lucene.analysis.AnalyzerWrapper;
042import org.apache.lucene.analysis.cjk.CJKAnalyzer;
043import org.apache.lucene.analysis.de.GermanAnalyzer;
044import org.apache.lucene.analysis.en.EnglishAnalyzer;
045import org.apache.lucene.analysis.es.SpanishAnalyzer;
046import org.apache.lucene.analysis.fr.FrenchAnalyzer;
047import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
048import org.apache.lucene.analysis.standard.StandardAnalyzer;
049
050public class BasicAnalyzer
051        extends AnalyzerWrapper {
052    
053    protected Log log = LogFactory.getLog(this.getClass());
054
055    private ExecutionErrorAccumulator eea;
056    private EntityType entityType;
057    private List<EntityAliasType> entityAliasTypes;
058    private List<EntityAttribute> entityAttributes;
059    private List<TagScope> tagScopes;
060
061    private Analyzer defaultAnalyzer;
062    private Map<String, Analyzer> cachedFieldAnalyzers = null;
063
064    private void init(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType,
065            final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes,
066            final List<TagScope> tagScopes) {
067        this.eea = eea;
068        this.entityType = entityType;
069        this.entityAliasTypes = entityAliasTypes;
070        this.entityAttributes = entityAttributes;
071        this.tagScopes = tagScopes;
072
073        defaultAnalyzer = getDefaultAnalyzer(eea, language);
074    }
075
076    @Override
077    public void close() {
078        super.close();
079        
080        defaultAnalyzer.close();
081        defaultAnalyzer = null;
082
083        if(cachedFieldAnalyzers != null) {
084            for(var cachedFieldAnalyzer : cachedFieldAnalyzers.values()) {
085                cachedFieldAnalyzer.close();
086            }
087            cachedFieldAnalyzers = null;
088        }
089    }
090    
091    public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType,
092            final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes,
093            final List<TagScope> tagScopes) {
094        super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY);
095        
096        init(eea, language, entityType, entityAliasTypes, entityAttributes, tagScopes);
097    }
098
099    public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType) {
100        super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY);
101
102        var coreControl = Session.getModelController(CoreControl.class);
103        var entityAliasControl = Session.getModelController(EntityAliasControl.class);
104        var tagControl = Session.getModelController(TagControl.class);
105        
106        init(eea, language, entityType, entityAliasControl.getEntityAliasTypesByEntityType(entityType),
107                coreControl.getEntityAttributesByEntityType(entityType), tagControl.getTagScopesByEntityType(entityType));
108    }
109
110    @Override
111    protected Analyzer getWrappedAnalyzer(String fieldName) {
112        // Hold a cache of Analyzers.
113        if(cachedFieldAnalyzers == null) {
114            cachedFieldAnalyzers = getFieldAnalyzers(eea, entityType, entityAttributes, tagScopes);
115        }
116
117        var analyzer = cachedFieldAnalyzers.get(fieldName);
118        
119        return (analyzer != null) ? analyzer : defaultAnalyzer;
120    }
121
122    @Override
123    public String toString() {
124        return "BasicAnalyzer(" + cachedFieldAnalyzers + ", default=" + defaultAnalyzer + ")";
125    }
126
127    @SuppressWarnings("resource") // This is taken care of in our close() method.
128    private Analyzer getDefaultAnalyzer(final ExecutionErrorAccumulator eea, final Language language) {
129        Analyzer selectedAnalyzer = null;
130        
131        if(language != null) {
132            var languageIsoName = language.getLanguageIsoName();
133            
134            if(languageIsoName.equals(Languages.en.name())) {
135                selectedAnalyzer = new EnglishAnalyzer();
136            } else if(languageIsoName.equals(Languages.de.name())) {
137                selectedAnalyzer = new GermanAnalyzer();
138            } else if(languageIsoName.equals(Languages.es.name())) {
139                selectedAnalyzer = new SpanishAnalyzer();
140            } else if(languageIsoName.equals(Languages.fr.name())) {
141                selectedAnalyzer = new FrenchAnalyzer();
142            } else if(languageIsoName.equals(Languages.jp.name())) {
143                selectedAnalyzer = new JapaneseAnalyzer();
144            } else if(languageIsoName.equals(Languages.ko.name()) || languageIsoName.equals(Languages.zh.name())) {
145                selectedAnalyzer = new CJKAnalyzer();
146            }
147        }
148        
149        return selectedAnalyzer == null ? new StandardAnalyzer() : selectedAnalyzer;
150    }
151
152    private Map<String, Analyzer> getEntityAliasesFieldAnalyzers(final List<EntityAliasType> entityAliasTypes, final Map<String, Analyzer> fieldAnalyzers) {
153        entityAliasTypes.stream().map(EntityAliasType::getLastDetail).forEach((entityAliasTypeDetail) -> {
154            var fieldName = entityAliasTypeDetail.getEntityAliasTypeName();
155            if(IndexerDebugFlags.LogBaseAnalyzer) {
156                log.info("--- fieldName = " + fieldName);
157            }
158
159            fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
160        });
161
162        return fieldAnalyzers;
163    }
164
165    private Map<String, Analyzer> getEntityAttributeFieldAnalyzers(final List<EntityAttribute> entityAttributes, final Map<String, Analyzer> fieldAnalyzers) {
166        entityAttributes.stream().map(EntityAttribute::getLastDetail).forEach((entityAttributeDetail) -> {
167            var fieldName = entityAttributeDetail.getEntityAttributeName();
168            var entityAttributeTypeName = entityAttributeDetail.getEntityAttributeType().getEntityAttributeTypeName();
169            if(IndexerDebugFlags.LogBaseAnalyzer) {
170                log.info("--- fieldName = " + fieldName + ", entityAttributeTypeName = " + entityAttributeTypeName);
171            }
172
173            // EntityAttributeTypes.INTEGER.name() - treated as a NumericField, no Analyzer.
174            // EntityAttributeTypes.LONG.name() - treated as a NumericField, no Analyzer.
175            // EntityAttributeTypes.DATE.name() - treated as a NumericField, no Analyzer.
176            // EntityAttributeTypes.TIME.name() - treated as a NumericField, no Analyzer.
177            // EntityAttributeTypes.STRING.name() - uses default Analyzer.
178            // EntityAttributeTypes.CLOB.name() - uses default Analyzer.
179            // EntityAttributeTypes.GEOPOINT.name() - ignored.
180            if (entityAttributeTypeName.equals(EntityAttributeTypes.BOOLEAN.name())
181                    || entityAttributeTypeName.equals(EntityAttributeTypes.NAME.name())
182                    || entityAttributeTypeName.equals(EntityAttributeTypes.MULTIPLELISTITEM.name())
183                    || entityAttributeTypeName.equals(EntityAttributeTypes.LISTITEM.name())) {
184                // Use the WhitespaceAnalyzer
185                fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer());
186            }
187        });
188        
189        return fieldAnalyzers;
190    }
191
192    private Map<String, Analyzer> getTagScopeFieldAnalyzers(final List<TagScope> tagScopes, final Map<String, Analyzer> fieldAnalyzers) {
193        tagScopes.forEach(tagScope -> {
194            var tagScopeName = tagScope.getLastDetail().getTagScopeName();
195            if(IndexerDebugFlags.LogBaseAnalyzer) {
196                log.info("--- fieldName = " + tagScopeName);
197            }
198            fieldAnalyzers.put(tagScopeName, new WhitespaceLowerCaseAnalyzer());
199        });
200
201        return fieldAnalyzers;
202    }
203
204    private Map<String, Analyzer> getWorkflowFieldAnalyzers(final EntityType entityType, final Map<String, Analyzer> fieldAnalyzers) {
205        var workflowControl = Session.getModelController(WorkflowControl.class);
206
207        workflowControl.getWorkflowsByEntityType(entityType).forEach(workflow -> {
208            var workflowName = workflow.getLastDetail().getWorkflowName();
209            if(IndexerDebugFlags.LogBaseAnalyzer) {
210                log.info("--- fieldName = " + workflowName);
211            }
212            fieldAnalyzers.put(workflowName, new WhitespaceLowerCaseAnalyzer());
213        });
214
215        return fieldAnalyzers;
216    }
217
218    private Map<String, Analyzer> getAppearanceFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) {
219        fieldAnalyzers.put(IndexFields.appearance.name(), new WhitespaceLowerCaseAnalyzer());
220
221        return fieldAnalyzers;
222    }
223
224    protected Map<String, Analyzer> getEntityTypeFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) {
225        // No additional Analyzers by default.
226        
227        return fieldAnalyzers;
228    }
229
230    protected Map<String, Analyzer> getFieldAnalyzers(final ExecutionErrorAccumulator eea, final EntityType entityType,
231            final List<EntityAttribute> entityAttributes, final List<TagScope> tagScopes) {
232        return getEntityTypeFieldAnalyzers(
233                getAppearanceFieldAnalyzers(
234                        getWorkflowFieldAnalyzers(entityType,
235                                getTagScopeFieldAnalyzers(tagScopes,
236                                        getEntityAliasesFieldAnalyzers(entityAliasTypes,
237                                                getEntityAttributeFieldAnalyzers(entityAttributes, new HashMap<>())
238                                        )
239                                )
240                        )
241                )
242        );
243    }
244
245    public Set<String> getDateFields() {
246        return null;
247    }
248
249    public Set<String> getDateTimeFields() {
250        return null;
251    }
252
253    public Set<String> getIntFields() {
254        return null;
255    }
256
257    public Set<String> getLongFields() {
258        return null;
259    }
260
261    public Set<String> getFloatFields() {
262        return null;
263    }
264
265    public Set<String> getDoubleFields() {
266        return null;
267    }
268
269}