From e0f0b9d1bf9c04ae13ee02305fb22aa2b054ba68 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 19 Dec 2024 21:43:35 -0500 Subject: [PATCH] truncation --- src/apps/web/WebData.h | 8 ++------ src/apps/web/WebUtils.h | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/apps/web/WebData.h b/src/apps/web/WebData.h index 431ba5a..da2fbc4 100644 --- a/src/apps/web/WebData.h +++ b/src/apps/web/WebData.h @@ -71,7 +71,7 @@ struct User { } if (username.size() == 0) username = to_hex(pubkey.substr(0,4)); - if (username.size() > 50) username = username.substr(0, 50) + "..."; + abbrevText(username, 50); } std::optional loadKindJson(lmdb::txn &txn, Decompressor &decomp, uint64_t kind) { @@ -280,11 +280,7 @@ struct Event { // If it was only a URL, just use raw URL if (content.size() == 0 || std::all_of(content.begin(), content.end(), [](unsigned char c){ return std::isspace(c); })) content = firstUrl; - auto textAbbrev = [](std::string &str, size_t maxLen){ - if (str.size() > maxLen) str = str.substr(0, maxLen-3) + "..."; - }; - - textAbbrev(content, 100); + abbrevText(content, 100); templarInternal::htmlEscape(content, true); output.text = std::move(content); diff --git a/src/apps/web/WebUtils.h b/src/apps/web/WebUtils.h index dde7575..ee86a9c 100644 --- a/src/apps/web/WebUtils.h +++ b/src/apps/web/WebUtils.h @@ -110,3 +110,29 @@ inline std::string stripUrls(std::string &content) { std::swap(output, content); return firstUrl; } + +inline void abbrevText(std::string &origStr, size_t maxLen) { + if (maxLen < 10) throw herr("abbrev too short"); + if (origStr.size() <= maxLen) return; + + std::string str = origStr.substr(0, maxLen-3); + + { + // If string ends in a multi-byte UTF-8 encoded code-point, chop it off. + // This avoids cutting in the middle of an encoded code-point. It's a 99% + // solution, not perfect. See: https://metacpan.org/pod/Unicode::Truncate + + auto endsInUtf8Extension = [&](){ + return str.size() && (str.back() & 0b1100'0000) == 0b1000'0000; + }; + + if (endsInUtf8Extension()) { + do str.pop_back(); while (endsInUtf8Extension()); + if (str.size()) str.pop_back(); + } + } + + str += "..."; + + std::swap(origStr, str); +}