Emoticons: detect keycaps

This commit is contained in:
Daniel Gultsch 2017-09-30 17:38:48 +02:00
parent bbfabaf930
commit 39a54838fa

View file

@ -1,8 +1,13 @@
package eu.siacs.conversations.utils; package eu.siacs.conversations.utils;
import android.util.Log;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.siacs.conversations.Config;
public class Emoticons { public class Emoticons {
private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF); private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
@ -12,11 +17,22 @@ public class Emoticons {
private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF); private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF); private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF); private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF); private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF); private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF); private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF); private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(GEOMETRIC_SHAPES, LATIN_SUPPLEMENT); private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
GEOMETRIC_SHAPES,
LATIN_SUPPLEMENT,
CYK_SYMBOLS_AND_PUNCTUATION,
LETTERLIKE_SYMBOLS,
KEYCAP_COMBINEABLE);
private static final UnicodeBlocks EMOJIS = new UnicodeBlocks( private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
MISC_SYMBOLS_AND_PICTOGRAPHS, MISC_SYMBOLS_AND_PICTOGRAPHS,
SUPPLEMENTAL_SYMBOLS, SUPPLEMENTAL_SYMBOLS,
@ -25,9 +41,11 @@ public class Emoticons {
MISC_SYMBOLS, MISC_SYMBOLS,
DINGBATS, DINGBATS,
ENCLOSED_ALPHANUMERIC_SUPPLEMENT, ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
MISC_TECHNICAL); MISC_TECHNICAL);
private static final int ZWJ = 0x200D; private static final int ZWJ = 0x200D;
private static final int VARIATION_16 = 0xFE0F; private static final int VARIATION_16 = 0xFE0F;
private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF); private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
private static List<Symbol> parse(String input) { private static List<Symbol> parse(String input) {
@ -88,22 +106,25 @@ public class Emoticons {
} }
} else { } else {
int previous = codepoints.get(codepoints.size() -1); int previous = codepoints.get(codepoints.size() -1);
if (SYMBOLIZE.contains(previous)) { if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
} else if (SYMBOLIZE.contains(previous)) {
add = codepoint == VARIATION_16; add = codepoint == VARIATION_16;
} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) { } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
add = codepoints.size() == 1; add = codepoints.size() == 1;
} else if (previous == VARIATION_16) { } else if (previous == VARIATION_16) {
add = isMerger(codepoint); add = isMerger(codepoint);
} else if (FITZPATRICK.contains(previous)) { } else if (FITZPATRICK.contains(previous)) {
add = codepoint == ZWJ || EMOJIS.contains(codepoint); add = codepoint == ZWJ;
} else if (ZWJ == previous) { } else if (ZWJ == previous) {
add = EMOJIS.contains(codepoint) || FITZPATRICK.contains(codepoint); add = EMOJIS.contains(codepoint);
} else if (isMerger(codepoint)) { } else if (isMerger(codepoint)) {
add = true; add = true;
} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) { } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
add = true; add = true;
} }
} }
Log.d(Config.LOGTAG,"code point "+String.format("%H",codepoint)+" added="+add);
if (add) { if (add) {
codepoints.add(codepoint); codepoints.add(codepoint);
return true; return true;
@ -119,21 +140,24 @@ public class Emoticons {
public Symbol build() { public Symbol build() {
if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) { if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
return Symbol.NON_EMOJI; return Symbol.NON_EMOJI;
} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
return Symbol.NON_EMOJI;
} }
return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI; return codepoints.size() == 0 ? Symbol.NON_EMOJI : Symbol.EMOJI;
} }
} }
public static class UnicodeBlocks { public static class UnicodeBlocks implements UnicodeSet {
final UnicodeRange[] ranges; final UnicodeSet[] unicodeSets;
public UnicodeBlocks(UnicodeRange... ranges) { public UnicodeBlocks(UnicodeSet... sets) {
this.ranges = ranges; this.unicodeSets = sets;
} }
@Override
public boolean contains(int codepoint) { public boolean contains(int codepoint) {
for(UnicodeRange range : ranges) { for(UnicodeSet unicodeSet : unicodeSets) {
if (range.contains(codepoint)) { if (unicodeSet.contains(codepoint)) {
return true; return true;
} }
} }
@ -141,8 +165,26 @@ public class Emoticons {
} }
} }
public interface UnicodeSet {
boolean contains(int codepoint);
}
public static class UnicodeRange { public static class UnicodeList implements UnicodeSet {
private final List<Integer> list;
public UnicodeList(Integer... codes) {
this.list = Arrays.asList(codes);
}
@Override
public boolean contains(int codepoint) {
return this.list.contains(codepoint);
}
}
public static class UnicodeRange implements UnicodeSet {
private final int lower; private final int lower;
private final int upper; private final int upper;