001// -------------------------------------------------------------------------------- 002// Copyright 2002-2024 Echo Three, LLC 003// 004// Licensed under the Apache License, Version 2.0 (the "License"); 005// you may not use this file except in compliance with the License. 006// You may obtain a copy of the License at 007// 008// http://www.apache.org/licenses/LICENSE-2.0 009// 010// Unless required by applicable law or agreed to in writing, software 011// distributed under the License is distributed on an "AS IS" BASIS, 012// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013// See the License for the specific language governing permissions and 014// limitations under the License. 015// -------------------------------------------------------------------------------- 016 017package com.echothree.util.common.string; 018 019import com.echothree.model.control.party.common.choice.NameSuffixChoicesBean; 020import com.echothree.model.control.party.common.choice.PersonalTitleChoicesBean; 021import com.google.common.base.Splitter; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Locale; 026import java.util.Map; 027 028public class BaseNameCleaner { 029 030 private StringUtils stringUtils = StringUtils.getInstance(); 031 032 private final Map<String, String> personalTitles = new HashMap<>(); 033 private final Map<String, String> personalTitlesOriginal = new HashMap<>(); 034 private int maxPersonalTitleSpaces = 0; 035 036 protected String cleanStringForTitleOrSuffix(String str) { 037 return stringUtils.cleanString(str, true, true, true).toLowerCase(Locale.getDefault()); 038 } 039 040 public Map<String, String> getPersonalTitles() { 041 return personalTitles; 042 } 043 044 public Map<String, String> getPersonalTitlesOriginal() { 045 return personalTitlesOriginal; 046 } 047 048 private final Map<String, String> nameSuffixes = new HashMap<>(); 049 private final Map<String, String> nameSuffixesOriginal = new HashMap<>(); 050 private int maxNameSuffixSpaces = 0; 051 052 public Map<String, String> getNameSuffixes() { 053 return nameSuffixes; 054 } 055 056 public Map<String, String> getNameSuffixesOriginal() { 057 return nameSuffixesOriginal; 058 } 059 060 protected void setupPersonalTitles(PersonalTitleChoicesBean personalTitleChoices) { 061 var valueIter = personalTitleChoices.getValues().iterator(); 062 var labelIter = personalTitleChoices.getLabels().iterator(); 063 064 while(valueIter.hasNext()) { 065 var originalLabel = labelIter.next(); 066 var label = cleanStringForTitleOrSuffix(originalLabel); 067 var value = valueIter.next(); 068 var spaceCount = stringUtils.countSpaces(label); 069 070 personalTitles.put(label, value); 071 personalTitlesOriginal.put(value, originalLabel); 072 073 if(spaceCount > maxPersonalTitleSpaces) { 074 maxPersonalTitleSpaces = spaceCount; 075 } 076 } 077 } 078 079 protected void setupNameSuffixes(NameSuffixChoicesBean nameSuffixChoices) { 080 var valueIter = nameSuffixChoices.getValues().iterator(); 081 var labelIter = nameSuffixChoices.getLabels().iterator(); 082 083 while(valueIter.hasNext()) { 084 var originalLabel = labelIter.next(); 085 var label = cleanStringForTitleOrSuffix(originalLabel); 086 var value = valueIter.next(); 087 var spaceCount = stringUtils.countSpaces(label); 088 089 nameSuffixes.put(label, value); 090 nameSuffixesOriginal.put(value, originalLabel); 091 092 if(spaceCount > maxNameSuffixSpaces) { 093 maxNameSuffixSpaces = spaceCount; 094 } 095 } 096 } 097 098 private List<String> iterableToList(Iterable<String> pieces) { 099 var list = new ArrayList<String>(); 100 101 for(var str : pieces) { 102 list.add(str); 103 } 104 105 return list; 106 } 107 108 private static final Splitter SpaceSplitter = Splitter.on(' ') 109 .trimResults() 110 .omitEmptyStrings(); 111 112 private static final int MaximumFirstNameLength = 20; 113 private static final int MaximumMiddleNameLength = 20; 114 private static final int MaximumLastNameLength = 20; 115 116 public NameResult getCleansedName(final String str) { 117 String personalTitleChoice = null; 118 int personalTitlePieces; 119 String firstName = null; 120 String middleName = null; 121 String lastName = null; 122 String nameSuffixChoice = null; 123 int nameSuffixPieces; 124 125 // 1) Break apart str into a List at any space character. 126 var pieces = iterableToList(SpaceSplitter.split(str)); 127 var piecesSize = pieces.size(); 128 var startingIndex = 0; 129 var endingIndex = piecesSize - 1; 130 131 // 2) Find the longest (in words) personal title. 132 for(var i = Math.min(maxPersonalTitleSpaces, endingIndex - startingIndex) ; i >= startingIndex ; i--) { 133 var personalTitle = new StringBuilder(); 134 135 for(int j = 0 ; j <= i ; j++) { 136 if(j > 0) { 137 personalTitle.append(' '); 138 } 139 140 personalTitle.append(pieces.get(startingIndex + j)); 141 } 142 143 personalTitleChoice = personalTitles.get(cleanStringForTitleOrSuffix(personalTitle.toString())); 144 145 if(personalTitleChoice != null) { 146 personalTitlePieces = i - startingIndex + 1; 147 startingIndex += personalTitlePieces; 148 break; 149 } 150 } 151 152 // 3) Find the longest (in words) name suffix. 153 if(startingIndex <= endingIndex) { 154 for(var i = Math.min(endingIndex == 0 ? 0 : endingIndex - maxNameSuffixSpaces, endingIndex - startingIndex) ; i <= endingIndex ; i++) { 155 var nameSuffix = new StringBuilder(); 156 157 for(int j = i ; j <= endingIndex ; j++) { 158 if(j > i) { 159 nameSuffix.append(' '); 160 } 161 162 nameSuffix.append(pieces.get(j)); 163 } 164 165 nameSuffixChoice = nameSuffixes.get(cleanStringForTitleOrSuffix(nameSuffix.toString())); 166 167 if(nameSuffixChoice != null) { 168 nameSuffixPieces = endingIndex - i + 1; 169 endingIndex -= nameSuffixPieces; 170 break; 171 } 172 } 173 } 174 175 // 4) Deal with commas and flipped names ("Harms, Richard"). 176 if(startingIndex <= endingIndex) { 177 var firstPiece = pieces.get(startingIndex); 178 179 if(firstPiece.endsWith(",") && firstPiece.length() > 1) { 180 // If the first piece ends with a comma, assume they put their last name first, and flip things around. 181 for(var i = startingIndex + 1 ; i <= endingIndex ; i++) { 182 pieces.set(i - 1, pieces.get(i)); 183 } 184 185 pieces.set(endingIndex, firstPiece); 186 } 187 188 // Trim all trailing commas, and remove any new empty elements. 189 for(var i = startingIndex ; i <= endingIndex ; i++) { 190 var element = pieces.get(i); 191 192 if(element.endsWith(",")) { 193 element = element.substring(0, element.length() - 1); 194 195 if(element.length() == 0) { 196 pieces.remove(i); 197 endingIndex--; 198 199 if(pieces.isEmpty()) { 200 break; 201 } else { 202 i--; 203 } 204 } else { 205 pieces.set(i, element); 206 } 207 } 208 } 209 } 210 211 // 5) Pick out bits of a name. 212 if(startingIndex <= endingIndex) { 213 var done = false; 214 215 if(personalTitleChoice != null && startingIndex - endingIndex == 0) { 216 // Mr. Harms 217 lastName = pieces.get(startingIndex); 218 done = true; 219 } else { 220 if(nameSuffixChoice != null && startingIndex - endingIndex == 0) { 221 // Richard Jr. 222 firstName = pieces.get(startingIndex); 223 done = true; 224 } else { 225 // Richard 226 if(personalTitleChoice == null && nameSuffixChoice == null && piecesSize == 1) { 227 firstName = pieces.get(startingIndex); 228 done = true; 229 } 230 } 231 } 232 233 if(!done) { 234 firstName = pieces.get(startingIndex); 235 startingIndex++; 236 237 if(startingIndex == endingIndex) { 238 lastName = pieces.get(startingIndex); 239 } else { 240 StringBuilder sb = new StringBuilder(); 241 242 for(int i = startingIndex ; i < endingIndex ; i++) { 243 String toAppend = pieces.get(i); 244 245 if(i != startingIndex) { 246 sb.append(' '); 247 } 248 249 sb.append(stringUtils.isAllSameCase(toAppend) ? stringUtils.normalizeCase(toAppend) : toAppend); 250 } 251 252 middleName = sb.toString(); 253 lastName = pieces.get(endingIndex); 254 } 255 } 256 } 257 258 // X) If any pieces of the name are all upper-case, or are all lower-case, fix them. 259 if(stringUtils.isAllSameCase(firstName)) { 260 firstName = stringUtils.normalizeCase(firstName); 261 } 262 263 if(stringUtils.isAllSameCase(lastName)) { 264 lastName = stringUtils.normalizeCase(lastName); 265 } 266 267 return new NameResult(personalTitleChoice, 268 stringUtils.left(firstName, MaximumFirstNameLength), 269 stringUtils.left(middleName, MaximumMiddleNameLength), 270 stringUtils.left(lastName, MaximumLastNameLength), 271 nameSuffixChoice); 272 } 273 274}