Another set of changes to URL detection

This commit is contained in:
Marvin W 2020-01-09 13:43:02 +01:00
parent b2af8c5112
commit 5e1f646cbc
No known key found for this signature in database
GPG key ID: 072E9235DB996F2A

View file

@ -6,9 +6,10 @@ using Xmpp;
namespace Dino.Ui.Util { namespace Dino.Ui.Util {
private static Regex url_regex; private static Regex URL_REGEX;
private static Map<unichar, unichar> matching_chars; private static Map<unichar, unichar> MATCHING_CHARS;
private const string[] allowed_schemes = {"http", "https", "ftp", "ftps", "irc", "ircs", "xmpp", "mailto", "sms", "smsto", "mms", "tel", "geo", "openpgp4fpr", "im", "news", "nntp", "sip", "ssh", "bitcoim", "sftp", "magnet", "vnc"}; private const unichar[] NON_TRAILING_CHARS = {'\'', '"', ',', '.', ';', '!', '?', '»', '”', '', '`', '~', '‽'};
private const string[] ALLOWED_SCHEMAS = {"http", "https", "ftp", "ftps", "irc", "ircs", "xmpp", "mailto", "sms", "smsto", "mms", "tel", "geo", "openpgp4fpr", "im", "news", "nntp", "sip", "ssh", "bitcoin", "sftp", "magnet", "vnc"};
private const string[] tango_colors_light = {"FCE94F", "FCAF3E", "E9B96E", "8AE234", "729FCF", "AD7FA8", "EF2929"}; private const string[] tango_colors_light = {"FCE94F", "FCAF3E", "E9B96E", "8AE234", "729FCF", "AD7FA8", "EF2929"};
private const string[] tango_colors_medium = {"EDD400", "F57900", "C17D11", "73D216", "3465A4", "75507B", "CC0000"}; private const string[] tango_colors_medium = {"EDD400", "F57900", "C17D11", "73D216", "3465A4", "75507B", "CC0000"};
private const string[] material_colors_800 = {"D32F2F", "C2185B", "7B1FA2", "512DA8", "303F9F", "1976D2", "0288D1", "0097A7", "00796B", "388E3C", "689F38", "AFB42B", "FFA000", "F57C00", "E64A19", "5D4037"}; private const string[] material_colors_800 = {"D32F2F", "C2185B", "7B1FA2", "512DA8", "303F9F", "1976D2", "0288D1", "0097A7", "00796B", "388E3C", "689F38", "AFB42B", "FFA000", "F57C00", "E64A19", "5D4037"};
@ -251,20 +252,20 @@ public static bool is_24h_format() {
} }
public static Regex get_url_regex() { public static Regex get_url_regex() {
if (url_regex == null) { if (URL_REGEX == null) {
url_regex = /\b(((http|ftp)s?:\/\/|(ircs?|xmpp|mailto|sms|smsto|mms|tel|geo|openpgp4fpr|im|news|nntp|sip|ssh|bitcoin|sftp|magnet|vnc|urn):)([^\s,.;!?"'»”’]|[,.;!?"'»]\S)+)/; URL_REGEX = /\b(((http|ftp)s?:\/\/|(ircs?|xmpp|mailto|sms|smsto|mms|tel|geo|openpgp4fpr|im|news|nntp|sip|ssh|bitcoin|sftp|magnet|vnc|urn):)\S+)/;
} }
return url_regex; return URL_REGEX;
} }
public static Map<unichar, unichar> get_matching_chars() { public static Map<unichar, unichar> get_matching_chars() {
if (matching_chars == null) { if (MATCHING_CHARS == null) {
matching_chars = new HashMap<unichar, unichar>(); MATCHING_CHARS = new HashMap<unichar, unichar>();
matching_chars[")".get_char(0)] = "(".get_char(0); MATCHING_CHARS[")".get_char(0)] = "(".get_char(0);
matching_chars["]".get_char(0)] = "[".get_char(0); MATCHING_CHARS["]".get_char(0)] = "[".get_char(0);
matching_chars["}".get_char(0)] = "{".get_char(0); MATCHING_CHARS["}".get_char(0)] = "{".get_char(0);
} }
return matching_chars; return MATCHING_CHARS;
} }
public static string parse_add_markup(string s_, string? highlight_word, bool parse_links, bool parse_text_markup, bool already_escaped_ = false) { public static string parse_add_markup(string s_, string? highlight_word, bool parse_links, bool parse_text_markup, bool already_escaped_ = false) {
@ -278,10 +279,12 @@ public static string parse_add_markup(string s_, string? highlight_word, bool pa
int start, end; int start, end;
match_info.fetch_pos(0, out start, out end); match_info.fetch_pos(0, out start, out end);
string link = s[start:end]; string link = s[start:end];
if (GLib.Uri.parse_scheme(link) in ALLOWED_SCHEMAS) {
Map<unichar, unichar> matching_chars = get_matching_chars(); Map<unichar, unichar> matching_chars = get_matching_chars();
unichar close_char; unichar close_char;
int last_char_index = link.length; int last_char_index = link.length;
while (link.get_prev_char(ref last_char_index, out close_char) && matching_chars.has_key(close_char)) { while (link.get_prev_char(ref last_char_index, out close_char)) {
if (matching_chars.has_key(close_char)) {
unichar open_char = matching_chars[close_char]; unichar open_char = matching_chars[close_char];
unichar char; unichar char;
int index = 0; int index = 0;
@ -300,9 +303,15 @@ public static string parse_add_markup(string s_, string? highlight_word, bool pa
} else { } else {
break; break;
} }
} else if (close_char in NON_TRAILING_CHARS) {
// Remove last char from url
end -= close_char.to_string().length;
link = s[start:end];
} else {
break;
}
} }
if (GLib.Uri.parse_scheme(link) in allowed_schemes) {
return parse_add_markup(s[0:start], highlight_word, parse_links, parse_text_markup, already_escaped) + return parse_add_markup(s[0:start], highlight_word, parse_links, parse_text_markup, already_escaped) +
"<a href=\"" + Markup.escape_text(link) + "\">" + parse_add_markup(link, highlight_word, false, false, already_escaped) + "</a>" + "<a href=\"" + Markup.escape_text(link) + "\">" + parse_add_markup(link, highlight_word, false, false, already_escaped) + "</a>" +
parse_add_markup(s[end:s.length], highlight_word, parse_links, parse_text_markup, already_escaped); parse_add_markup(s[end:s.length], highlight_word, parse_links, parse_text_markup, already_escaped);