001// -------------------------------------------------------------------------------- 002// Copyright 2002-2025 Echo Three, LLC 003// 004// Licensed under the Apache License, Version 2.0 (the "License"); 005// you may not use this file except in compliance with the License. 006// You may obtain a copy of the License at 007// 008// http://www.apache.org/licenses/LICENSE-2.0 009// 010// Unless required by applicable law or agreed to in writing, software 011// distributed under the License is distributed on an "AS IS" BASIS, 012// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013// See the License for the specific language governing permissions and 014// limitations under the License. 015// -------------------------------------------------------------------------------- 016 017package com.echothree.model.control.index.server.analyzer; 018 019import com.echothree.model.control.core.common.EntityAttributeTypes; 020import com.echothree.model.control.core.server.control.CoreControl; 021import com.echothree.model.control.core.server.control.EntityAliasControl; 022import com.echothree.model.control.index.common.IndexFields; 023import com.echothree.model.control.index.server.indexer.IndexerDebugFlags; 024import com.echothree.model.control.party.common.Languages; 025import com.echothree.model.control.tag.server.control.TagControl; 026import com.echothree.model.control.workflow.server.control.WorkflowControl; 027import com.echothree.model.data.core.server.entity.EntityAliasType; 028import com.echothree.model.data.core.server.entity.EntityAttribute; 029import com.echothree.model.data.core.server.entity.EntityType; 030import com.echothree.model.data.party.server.entity.Language; 031import com.echothree.model.data.tag.server.entity.TagScope; 032import com.echothree.util.server.message.ExecutionErrorAccumulator; 033import com.echothree.util.server.persistence.Session; 034import java.util.HashMap; 035import java.util.List; 036import java.util.Map; 037import java.util.Set; 038import org.apache.commons.logging.Log; 039import org.apache.commons.logging.LogFactory; 040import org.apache.lucene.analysis.Analyzer; 041import org.apache.lucene.analysis.AnalyzerWrapper; 042import org.apache.lucene.analysis.cjk.CJKAnalyzer; 043import org.apache.lucene.analysis.de.GermanAnalyzer; 044import org.apache.lucene.analysis.en.EnglishAnalyzer; 045import org.apache.lucene.analysis.es.SpanishAnalyzer; 046import org.apache.lucene.analysis.fr.FrenchAnalyzer; 047import org.apache.lucene.analysis.ja.JapaneseAnalyzer; 048import org.apache.lucene.analysis.standard.StandardAnalyzer; 049 050public class BasicAnalyzer 051 extends AnalyzerWrapper { 052 053 protected Log log = LogFactory.getLog(this.getClass()); 054 055 private ExecutionErrorAccumulator eea; 056 private EntityType entityType; 057 private List<EntityAliasType> entityAliasTypes; 058 private List<EntityAttribute> entityAttributes; 059 private List<TagScope> tagScopes; 060 061 private Analyzer defaultAnalyzer; 062 private Map<String, Analyzer> cachedFieldAnalyzers = null; 063 064 private void init(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType, 065 final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes, 066 final List<TagScope> tagScopes) { 067 this.eea = eea; 068 this.entityType = entityType; 069 this.entityAliasTypes = entityAliasTypes; 070 this.entityAttributes = entityAttributes; 071 this.tagScopes = tagScopes; 072 073 defaultAnalyzer = getDefaultAnalyzer(eea, language); 074 } 075 076 @Override 077 public void close() { 078 super.close(); 079 080 defaultAnalyzer.close(); 081 defaultAnalyzer = null; 082 083 if(cachedFieldAnalyzers != null) { 084 for(var cachedFieldAnalyzer : cachedFieldAnalyzers.values()) { 085 cachedFieldAnalyzer.close(); 086 } 087 cachedFieldAnalyzers = null; 088 } 089 } 090 091 public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType, 092 final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes, 093 final List<TagScope> tagScopes) { 094 super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY); 095 096 init(eea, language, entityType, entityAliasTypes, entityAttributes, tagScopes); 097 } 098 099 public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType) { 100 super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY); 101 102 var coreControl = Session.getModelController(CoreControl.class); 103 var entityAliasControl = Session.getModelController(EntityAliasControl.class); 104 var tagControl = Session.getModelController(TagControl.class); 105 106 init(eea, language, entityType, entityAliasControl.getEntityAliasTypesByEntityType(entityType), 107 coreControl.getEntityAttributesByEntityType(entityType), tagControl.getTagScopesByEntityType(entityType)); 108 } 109 110 @Override 111 protected Analyzer getWrappedAnalyzer(String fieldName) { 112 // Hold a cache of Analyzers. 113 if(cachedFieldAnalyzers == null) { 114 cachedFieldAnalyzers = getFieldAnalyzers(eea, entityType, entityAttributes, tagScopes); 115 } 116 117 var analyzer = cachedFieldAnalyzers.get(fieldName); 118 119 return (analyzer != null) ? analyzer : defaultAnalyzer; 120 } 121 122 @Override 123 public String toString() { 124 return "BasicAnalyzer(" + cachedFieldAnalyzers + ", default=" + defaultAnalyzer + ")"; 125 } 126 127 @SuppressWarnings("resource") // This is taken care of in our close() method. 128 private Analyzer getDefaultAnalyzer(final ExecutionErrorAccumulator eea, final Language language) { 129 Analyzer selectedAnalyzer = null; 130 131 if(language != null) { 132 var languageIsoName = language.getLanguageIsoName(); 133 134 if(languageIsoName.equals(Languages.en.name())) { 135 selectedAnalyzer = new EnglishAnalyzer(); 136 } else if(languageIsoName.equals(Languages.de.name())) { 137 selectedAnalyzer = new GermanAnalyzer(); 138 } else if(languageIsoName.equals(Languages.es.name())) { 139 selectedAnalyzer = new SpanishAnalyzer(); 140 } else if(languageIsoName.equals(Languages.fr.name())) { 141 selectedAnalyzer = new FrenchAnalyzer(); 142 } else if(languageIsoName.equals(Languages.jp.name())) { 143 selectedAnalyzer = new JapaneseAnalyzer(); 144 } else if(languageIsoName.equals(Languages.ko.name()) || languageIsoName.equals(Languages.zh.name())) { 145 selectedAnalyzer = new CJKAnalyzer(); 146 } 147 } 148 149 return selectedAnalyzer == null ? new StandardAnalyzer() : selectedAnalyzer; 150 } 151 152 private Map<String, Analyzer> getEntityAliasesFieldAnalyzers(final List<EntityAliasType> entityAliasTypes, final Map<String, Analyzer> fieldAnalyzers) { 153 entityAliasTypes.stream().map(EntityAliasType::getLastDetail).forEach((entityAliasTypeDetail) -> { 154 var fieldName = entityAliasTypeDetail.getEntityAliasTypeName(); 155 if(IndexerDebugFlags.LogBaseAnalyzer) { 156 log.info("--- fieldName = " + fieldName); 157 } 158 159 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 160 }); 161 162 return fieldAnalyzers; 163 } 164 165 private Map<String, Analyzer> getEntityAttributeFieldAnalyzers(final List<EntityAttribute> entityAttributes, final Map<String, Analyzer> fieldAnalyzers) { 166 entityAttributes.stream().map(EntityAttribute::getLastDetail).forEach((entityAttributeDetail) -> { 167 var fieldName = entityAttributeDetail.getEntityAttributeName(); 168 var entityAttributeTypeName = entityAttributeDetail.getEntityAttributeType().getEntityAttributeTypeName(); 169 if(IndexerDebugFlags.LogBaseAnalyzer) { 170 log.info("--- fieldName = " + fieldName + ", entityAttributeTypeName = " + entityAttributeTypeName); 171 } 172 173 // EntityAttributeTypes.INTEGER.name() - treated as a NumericField, no Analyzer. 174 // EntityAttributeTypes.LONG.name() - treated as a NumericField, no Analyzer. 175 // EntityAttributeTypes.DATE.name() - treated as a NumericField, no Analyzer. 176 // EntityAttributeTypes.TIME.name() - treated as a NumericField, no Analyzer. 177 // EntityAttributeTypes.STRING.name() - uses default Analyzer. 178 // EntityAttributeTypes.CLOB.name() - uses default Analyzer. 179 // EntityAttributeTypes.GEOPOINT.name() - ignored. 180 if (entityAttributeTypeName.equals(EntityAttributeTypes.BOOLEAN.name()) 181 || entityAttributeTypeName.equals(EntityAttributeTypes.NAME.name()) 182 || entityAttributeTypeName.equals(EntityAttributeTypes.MULTIPLELISTITEM.name()) 183 || entityAttributeTypeName.equals(EntityAttributeTypes.LISTITEM.name())) { 184 // Use the WhitespaceAnalyzer 185 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 186 } 187 }); 188 189 return fieldAnalyzers; 190 } 191 192 private Map<String, Analyzer> getTagScopeFieldAnalyzers(final List<TagScope> tagScopes, final Map<String, Analyzer> fieldAnalyzers) { 193 tagScopes.forEach(tagScope -> { 194 var tagScopeName = tagScope.getLastDetail().getTagScopeName(); 195 if(IndexerDebugFlags.LogBaseAnalyzer) { 196 log.info("--- fieldName = " + tagScopeName); 197 } 198 fieldAnalyzers.put(tagScopeName, new WhitespaceLowerCaseAnalyzer()); 199 }); 200 201 return fieldAnalyzers; 202 } 203 204 private Map<String, Analyzer> getWorkflowFieldAnalyzers(final EntityType entityType, final Map<String, Analyzer> fieldAnalyzers) { 205 var workflowControl = Session.getModelController(WorkflowControl.class); 206 207 workflowControl.getWorkflowsByEntityType(entityType).forEach(workflow -> { 208 var workflowName = workflow.getLastDetail().getWorkflowName(); 209 if(IndexerDebugFlags.LogBaseAnalyzer) { 210 log.info("--- fieldName = " + workflowName); 211 } 212 fieldAnalyzers.put(workflowName, new WhitespaceLowerCaseAnalyzer()); 213 }); 214 215 return fieldAnalyzers; 216 } 217 218 private Map<String, Analyzer> getAppearanceFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) { 219 fieldAnalyzers.put(IndexFields.appearance.name(), new WhitespaceLowerCaseAnalyzer()); 220 221 return fieldAnalyzers; 222 } 223 224 protected Map<String, Analyzer> getEntityTypeFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) { 225 // No additional Analyzers by default. 226 227 return fieldAnalyzers; 228 } 229 230 protected Map<String, Analyzer> getFieldAnalyzers(final ExecutionErrorAccumulator eea, final EntityType entityType, 231 final List<EntityAttribute> entityAttributes, final List<TagScope> tagScopes) { 232 return getEntityTypeFieldAnalyzers( 233 getAppearanceFieldAnalyzers( 234 getWorkflowFieldAnalyzers(entityType, 235 getTagScopeFieldAnalyzers(tagScopes, 236 getEntityAliasesFieldAnalyzers(entityAliasTypes, 237 getEntityAttributeFieldAnalyzers(entityAttributes, new HashMap<>()) 238 ) 239 ) 240 ) 241 ) 242 ); 243 } 244 245 public Set<String> getDateFields() { 246 return null; 247 } 248 249 public Set<String> getDateTimeFields() { 250 return null; 251 } 252 253 public Set<String> getIntFields() { 254 return null; 255 } 256 257 public Set<String> getLongFields() { 258 return null; 259 } 260 261 public Set<String> getFloatFields() { 262 return null; 263 } 264 265 public Set<String> getDoubleFields() { 266 return null; 267 } 268 269}