From c0aea75c163e1c28f22d102666f6a97441973041 Mon Sep 17 00:00:00 2001 From: David Kaspar Date: Tue, 23 Apr 2024 17:16:53 +0200 Subject: [PATCH 1/2] added test for html entity numbers to regular expression added a few common html entities and entity numbers --- .../amethyst/commons/preview/MetaTagsParser.kt | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt b/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt index 75b9e7325..047625140 100644 --- a/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt +++ b/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt @@ -130,7 +130,7 @@ object MetaTagsParser { // - commonly used character references in attribute values are resolved private class Attrs { companion object { - val RE_CHAR_REF = Regex("""&(\w+)(;?)""") + val RE_CHAR_REF = Regex("""&(#?)(\w+)(;?)""") val BASE_CHAR_REFS = mapOf( "amp" to "&", @@ -148,16 +148,25 @@ object MetaTagsParser { "equals" to "=", "grave" to "`", "DiacriticalGrave" to "`", + "039" to "'", + "8217" to "’", + "8216" to "‘", + "39" to "'", + "ldquo" to "“", + "rdquo" to "”", + "mdash" to "—", + "hellip" to "…", + "x27" to "'", ) fun replaceCharRefs(match: MatchResult): String { - val bcr = BASE_CHAR_REFS[match.groupValues[1]] + val bcr = BASE_CHAR_REFS[match.groupValues[2]] if (bcr != null) { return bcr } // non-base char refs must be terminated by ';' if (match.groupValues[2].isNotEmpty()) { - val cr = CHAR_REFS[match.groupValues[1]] + val cr = CHAR_REFS[match.groupValues[2]] if (cr != null) { return cr } From 94af0eb220753e6b84469d0647a02cfbbc102c3a Mon Sep 17 00:00:00 2001 From: David Kaspar Date: Tue, 23 Apr 2024 19:59:12 +0200 Subject: [PATCH 2/2] incremented matching group due to added matching group added nbsp entity --- .../vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt b/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt index 047625140..68fba9e94 100644 --- a/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt +++ b/commons/src/main/java/com/vitorpamplona/amethyst/commons/preview/MetaTagsParser.kt @@ -141,6 +141,8 @@ object MetaTagsParser { "LT" to "<", "gt" to ">", "GT" to ">", + "nbsp" to " ", + "NBSP" to " ", ) val CHAR_REFS = mapOf( @@ -157,6 +159,7 @@ object MetaTagsParser { "mdash" to "—", "hellip" to "…", "x27" to "'", + "nbsp" to " ", ) fun replaceCharRefs(match: MatchResult): String { @@ -165,7 +168,7 @@ object MetaTagsParser { return bcr } // non-base char refs must be terminated by ';' - if (match.groupValues[2].isNotEmpty()) { + if (match.groupValues[3].isNotEmpty()) { val cr = CHAR_REFS[match.groupValues[2]] if (cr != null) { return cr