warn when using _only_ ambiguous cyrillic
This commit is contained in:
parent
d963d95e30
commit
2ec1d0cc09
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018, Daniel Gultsch All rights reserved.
|
* Copyright (c) 2018-2019, Daniel Gultsch All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without modification,
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
* are permitted provided that the following conditions are met:
|
* are permitted provided that the following conditions are met:
|
||||||
|
@ -40,6 +40,8 @@ import android.text.style.ForegroundColorSpan;
|
||||||
import android.util.LruCache;
|
import android.util.LruCache;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -57,6 +59,7 @@ public class IrregularUnicodeDetector {
|
||||||
|
|
||||||
private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
|
private static final Map<Character.UnicodeBlock, Character.UnicodeBlock> NORMALIZATION_MAP;
|
||||||
private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
|
private static final LruCache<Jid, PatternTuple> CACHE = new LruCache<>(4096);
|
||||||
|
private static final List<String> AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","q","о","р","с","у");
|
||||||
|
|
||||||
static {
|
static {
|
||||||
Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
|
Map<Character.UnicodeBlock, Character.UnicodeBlock> temp = new HashMap<>();
|
||||||
|
@ -185,13 +188,41 @@ public class IrregularUnicodeDetector {
|
||||||
private static Set<String> findIrregularCodePoints(String word) {
|
private static Set<String> findIrregularCodePoints(String word) {
|
||||||
Set<String> codePoints;
|
Set<String> codePoints;
|
||||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
|
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
|
||||||
codePoints = eliminateFirstAndGetCodePointsCompat(mapCompat(word));
|
final Map<Character.UnicodeBlock, List<String>> map = mapCompat(word);
|
||||||
|
final Set<String> set = asSet(map);
|
||||||
|
if (containsOnlyAmbiguousCyrillic(set)) {
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
codePoints = eliminateFirstAndGetCodePointsCompat(map);
|
||||||
} else {
|
} else {
|
||||||
codePoints = eliminateFirstAndGetCodePoints(map(word));
|
final Map<Character.UnicodeScript, List<String>> map = map(word);
|
||||||
|
final Set<String> set = asSet(map);
|
||||||
|
if (containsOnlyAmbiguousCyrillic(set)) {
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
codePoints = eliminateFirstAndGetCodePoints(map);
|
||||||
}
|
}
|
||||||
return codePoints;
|
return codePoints;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Set<String> asSet(Map<?, List<String>> map) {
|
||||||
|
final Set<String> flat = new HashSet<>();
|
||||||
|
for(List<String> value : map.values()) {
|
||||||
|
flat.addAll(value);
|
||||||
|
}
|
||||||
|
return flat;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean containsOnlyAmbiguousCyrillic(Collection<String> codePoints) {
|
||||||
|
for (String codePoint : codePoints) {
|
||||||
|
if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private static PatternTuple find(Jid jid) {
|
private static PatternTuple find(Jid jid) {
|
||||||
synchronized (CACHE) {
|
synchronized (CACHE) {
|
||||||
PatternTuple pattern = CACHE.get(jid);
|
PatternTuple pattern = CACHE.get(jid);
|
||||||
|
|
Loading…
Reference in a new issue