001// -------------------------------------------------------------------------------- 002// Copyright 2002-2024 Echo Three, LLC 003// 004// Licensed under the Apache License, Version 2.0 (the "License"); 005// you may not use this file except in compliance with the License. 006// You may obtain a copy of the License at 007// 008// http://www.apache.org/licenses/LICENSE-2.0 009// 010// Unless required by applicable law or agreed to in writing, software 011// distributed under the License is distributed on an "AS IS" BASIS, 012// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013// See the License for the specific language governing permissions and 014// limitations under the License. 015// -------------------------------------------------------------------------------- 016 017package com.echothree.model.control.index.server.analysis; 018 019import com.echothree.model.control.core.common.EntityAttributeTypes; 020import com.echothree.model.control.core.server.control.CoreControl; 021import com.echothree.model.control.index.common.IndexFields; 022import com.echothree.model.control.index.server.indexer.IndexerDebugFlags; 023import com.echothree.model.control.party.common.Languages; 024import com.echothree.model.control.tag.server.control.TagControl; 025import com.echothree.model.control.workflow.server.control.WorkflowControl; 026import com.echothree.model.data.core.server.entity.EntityAliasType; 027import com.echothree.model.data.core.server.entity.EntityAttribute; 028import com.echothree.model.data.core.server.entity.EntityType; 029import com.echothree.model.data.party.server.entity.Language; 030import com.echothree.model.data.tag.server.entity.TagScope; 031import com.echothree.util.server.message.ExecutionErrorAccumulator; 032import com.echothree.util.server.persistence.Session; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036import org.apache.commons.logging.Log; 037import org.apache.commons.logging.LogFactory; 038import org.apache.lucene.analysis.Analyzer; 039import org.apache.lucene.analysis.AnalyzerWrapper; 040import org.apache.lucene.analysis.cjk.CJKAnalyzer; 041import org.apache.lucene.analysis.de.GermanAnalyzer; 042import org.apache.lucene.analysis.en.EnglishAnalyzer; 043import org.apache.lucene.analysis.es.SpanishAnalyzer; 044import org.apache.lucene.analysis.fr.FrenchAnalyzer; 045import org.apache.lucene.analysis.ja.JapaneseAnalyzer; 046import org.apache.lucene.analysis.standard.StandardAnalyzer; 047 048public class BasicAnalyzer 049 extends AnalyzerWrapper { 050 051 protected Log log = LogFactory.getLog(this.getClass()); 052 053 private ExecutionErrorAccumulator eea; 054 private EntityType entityType; 055 private List<EntityAliasType> entityAliasTypes; 056 private List<EntityAttribute> entityAttributes; 057 private List<TagScope> tagScopes; 058 059 private Analyzer defaultAnalyzer; 060 private Map<String, Analyzer> cachedFieldAnalyzers = null; 061 062 private void init(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType, 063 final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes, 064 final List<TagScope> tagScopes) { 065 this.eea = eea; 066 this.entityType = entityType; 067 this.entityAliasTypes = entityAliasTypes; 068 this.entityAttributes = entityAttributes; 069 this.tagScopes = tagScopes; 070 071 defaultAnalyzer = getDefaultAnalyzer(eea, language); 072 } 073 074 @Override 075 public void close() { 076 super.close(); 077 078 defaultAnalyzer.close(); 079 defaultAnalyzer = null; 080 081 if(cachedFieldAnalyzers != null) { 082 for(var cachedFieldAnalyzer : cachedFieldAnalyzers.values()) { 083 cachedFieldAnalyzer.close(); 084 } 085 cachedFieldAnalyzers = null; 086 } 087 } 088 089 public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType, 090 final List<EntityAliasType> entityAliasTypes, final List<EntityAttribute> entityAttributes, 091 final List<TagScope> tagScopes) { 092 super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY); 093 094 init(eea, language, entityType, entityAliasTypes, entityAttributes, tagScopes); 095 } 096 097 public BasicAnalyzer(final ExecutionErrorAccumulator eea, final Language language, final EntityType entityType) { 098 super(AnalyzerWrapper.PER_FIELD_REUSE_STRATEGY); 099 100 var coreControl = Session.getModelController(CoreControl.class); 101 var tagControl = Session.getModelController(TagControl.class); 102 103 init(eea, language, entityType, coreControl.getEntityAliasTypesByEntityType(entityType), 104 coreControl.getEntityAttributesByEntityType(entityType), tagControl.getTagScopesByEntityType(entityType)); 105 } 106 107 @Override 108 protected Analyzer getWrappedAnalyzer(String fieldName) { 109 // Hold a cache of Analyzers. 110 if(cachedFieldAnalyzers == null) { 111 cachedFieldAnalyzers = getFieldAnalyzers(eea, entityType, entityAttributes, tagScopes); 112 } 113 114 var analyzer = cachedFieldAnalyzers.get(fieldName); 115 116 return (analyzer != null) ? analyzer : defaultAnalyzer; 117 } 118 119 @Override 120 protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { 121 return components; 122 } 123 124 @Override 125 public String toString() { 126 return "BasicAnalyzer(" + cachedFieldAnalyzers + ", default=" + defaultAnalyzer + ")"; 127 } 128 129 private Analyzer getDefaultAnalyzer(final ExecutionErrorAccumulator eea, final Language language) { 130 Analyzer selectedAnalyzer = null; 131 132 if(language != null) { 133 String languageIsoName = language.getLanguageIsoName(); 134 135 if(languageIsoName.equals(Languages.en.name())) { 136 selectedAnalyzer = new EnglishAnalyzer(); 137 } else if(languageIsoName.equals(Languages.de.name())) { 138 selectedAnalyzer = new GermanAnalyzer(); 139 } else if(languageIsoName.equals(Languages.es.name())) { 140 selectedAnalyzer = new SpanishAnalyzer(); 141 } else if(languageIsoName.equals(Languages.fr.name())) { 142 selectedAnalyzer = new FrenchAnalyzer(); 143 } else if(languageIsoName.equals(Languages.jp.name())) { 144 selectedAnalyzer = new JapaneseAnalyzer(); 145 } else if(languageIsoName.equals(Languages.ko.name()) || languageIsoName.equals(Languages.zh.name())) { 146 selectedAnalyzer = new CJKAnalyzer(); 147 } 148 } 149 150 return selectedAnalyzer == null ? new StandardAnalyzer() : selectedAnalyzer; 151 } 152 153 private Map<String, Analyzer> getEntityAliasesFieldAnalyzers(final List<EntityAliasType> entityAliasTypes, final Map<String, Analyzer> fieldAnalyzers) { 154 entityAliasTypes.stream().map(EntityAliasType::getLastDetail).forEach((entityAliasTypeDetail) -> { 155 var fieldName = entityAliasTypeDetail.getEntityAliasTypeName(); 156 if(IndexerDebugFlags.LogBaseAnalyzer) { 157 log.info("--- fieldName = " + fieldName); 158 } 159 160 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 161 }); 162 163 return fieldAnalyzers; 164 } 165 166 private Map<String, Analyzer> getEntityAttributeFieldAnalyzers(final List<EntityAttribute> entityAttributes, final Map<String, Analyzer> fieldAnalyzers) { 167 entityAttributes.stream().map(EntityAttribute::getLastDetail).forEach((entityAttributeDetail) -> { 168 String fieldName = entityAttributeDetail.getEntityAttributeName(); 169 String entityAttributeTypeName = entityAttributeDetail.getEntityAttributeType().getEntityAttributeTypeName(); 170 if(IndexerDebugFlags.LogBaseAnalyzer) { 171 log.info("--- fieldName = " + fieldName + ", entityAttributeTypeName = " + entityAttributeTypeName); 172 } 173 174 // EntityAttributeTypes.INTEGER.name() - treated as a NumericField, no Analyzer. 175 // EntityAttributeTypes.LONG.name() - treated as a NumericField, no Analyzer. 176 // EntityAttributeTypes.DATE.name() - treated as a NumericField, no Analyzer. 177 // EntityAttributeTypes.TIME.name() - treated as a NumericField, no Analyzer. 178 // EntityAttributeTypes.STRING.name() - uses default Analyzer. 179 // EntityAttributeTypes.CLOB.name() - uses default Analyzer. 180 // EntityAttributeTypes.GEOPOINT.name() - ignored. 181 if (entityAttributeTypeName.equals(EntityAttributeTypes.BOOLEAN.name()) 182 || entityAttributeTypeName.equals(EntityAttributeTypes.NAME.name()) 183 || entityAttributeTypeName.equals(EntityAttributeTypes.MULTIPLELISTITEM.name()) 184 || entityAttributeTypeName.equals(EntityAttributeTypes.LISTITEM.name())) { 185 // Use the WhitespaceAnalyzer 186 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 187 } 188 }); 189 190 return fieldAnalyzers; 191 } 192 193 private Map<String, Analyzer> getTagScopeFieldAnalyzers(final List<TagScope> tagScopes, final Map<String, Analyzer> fieldAnalyzers) { 194 tagScopes.stream().map((tagScope) -> tagScope.getLastDetail().getTagScopeName()).map((fieldName) -> { 195 if(IndexerDebugFlags.LogBaseAnalyzer) { 196 log.info("--- fieldName = " + fieldName); 197 } 198 return fieldName; 199 }).forEach((fieldName) -> { 200 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 201 }); 202 203 return fieldAnalyzers; 204 } 205 206 private Map<String, Analyzer> getWorkflowFieldAnalyzers(final EntityType entityType, final Map<String, Analyzer> fieldAnalyzers) { 207 var workflowControl = Session.getModelController(WorkflowControl.class); 208 209 workflowControl.getWorkflowsByEntityType(entityType).stream().map((workflow) -> workflow.getLastDetail().getWorkflowName()).map((fieldName) -> { 210 if(IndexerDebugFlags.LogBaseAnalyzer) { 211 log.info("--- fieldName = " + fieldName); 212 } 213 return fieldName; 214 }).forEach((fieldName) -> { 215 fieldAnalyzers.put(fieldName, new WhitespaceLowerCaseAnalyzer()); 216 }); 217 218 return fieldAnalyzers; 219 } 220 221 private Map<String, Analyzer> getAppearanceFieldAnalyzers(final Map<String, Analyzer> fieldAnalyzers) { 222 fieldAnalyzers.put(IndexFields.appearance.name(), new WhitespaceLowerCaseAnalyzer()); 223 224 return fieldAnalyzers; 225 } 226 227 protected Map<String, Analyzer> getEntityTypeAnalyzers(final Map<String, Analyzer> fieldAnalyzers) { 228 // No additional Analyzers by default. 229 230 return fieldAnalyzers; 231 } 232 233 private Map<String, Analyzer> getFieldAnalyzers(final ExecutionErrorAccumulator eea, final EntityType entityType, 234 final List<EntityAttribute> entityAttributes, final List<TagScope> tagScopes) { 235 return getEntityTypeAnalyzers(getAppearanceFieldAnalyzers(getWorkflowFieldAnalyzers(entityType, 236 getTagScopeFieldAnalyzers(tagScopes, getEntityAliasesFieldAnalyzers(entityAliasTypes, 237 getEntityAttributeFieldAnalyzers(entityAttributes, new HashMap<>())))))); 238 } 239 240}