warn when using _only_ ambiguous cyrillic

This commit is contained in:
Daniel Gultsch 2019-09-16 15:13:53 +02:00
parent d963d95e30
commit 2ec1d0cc09

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018, Daniel Gultsch All rights reserved. * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, * Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met: * are permitted provided that the following conditions are met:
@ -40,6 +40,8 @@ import android.text.style.ForegroundColorSpan;
import android.util.LruCache; import android.util.LruCache;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -57,6 +59,7 @@ public class IrregularUnicodeDetector {
private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP; private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096); private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","q","о","р","с","у");
static { static {
Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>(); Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
@ -185,13 +188,41 @@ public class IrregularUnicodeDetector {
private static Set<String> findIrregularCodePoints(String word) { private static Set<String> findIrregularCodePoints(String word) {
Set<String> codePoints; Set<String> codePoints;
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) { if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
codePoints = eliminateFirstAndGetCodePointsCompat(mapCompat(word)); final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
final Set<String> set = asSet(map);
if (containsOnlyAmbiguousCyrillic(set)) {
return set;
}
codePoints = eliminateFirstAndGetCodePointsCompat(map);
} else { } else {
codePoints = eliminateFirstAndGetCodePoints(map(word)); final Map<Character.UnicodeScript, List<String>> map = map(word);
final Set<String> set = asSet(map);
if (containsOnlyAmbiguousCyrillic(set)) {
return set;
}
codePoints = eliminateFirstAndGetCodePoints(map);
} }
return codePoints; return codePoints;
} }
private static Set<String> asSet(Map<?, List<String>> map) {
final Set<String> flat = new HashSet<>();
for(List<String> value : map.values()) {
flat.addAll(value);
}
return flat;
}
private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
for (String codePoint : codePoints) {
if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
return false;
}
}
return true;
}
private static PatternTuple find(Jid jid) { private static PatternTuple find(Jid jid) {
synchronized (CACHE) { synchronized (CACHE) {
PatternTuple pattern = CACHE.get(jid); PatternTuple pattern = CACHE.get(jid);