truncation

This commit is contained in:
Doug Hoyte
2024-12-19 21:43:35 -05:00
parent e65d50bb22
commit e0f0b9d1bf
2 changed files with 28 additions and 6 deletions

View File

@ -71,7 +71,7 @@ struct User {
} }
if (username.size() == 0) username = to_hex(pubkey.substr(0,4)); if (username.size() == 0) username = to_hex(pubkey.substr(0,4));
if (username.size() > 50) username = username.substr(0, 50) + "..."; abbrevText(username, 50);
} }
std::optional<tao::json::value> loadKindJson(lmdb::txn &txn, Decompressor &decomp, uint64_t kind) { std::optional<tao::json::value> loadKindJson(lmdb::txn &txn, Decompressor &decomp, uint64_t kind) {
@ -280,11 +280,7 @@ struct Event {
// If it was only a URL, just use raw URL // If it was only a URL, just use raw URL
if (content.size() == 0 || std::all_of(content.begin(), content.end(), [](unsigned char c){ return std::isspace(c); })) content = firstUrl; if (content.size() == 0 || std::all_of(content.begin(), content.end(), [](unsigned char c){ return std::isspace(c); })) content = firstUrl;
auto textAbbrev = [](std::string &str, size_t maxLen){ abbrevText(content, 100);
if (str.size() > maxLen) str = str.substr(0, maxLen-3) + "...";
};
textAbbrev(content, 100);
templarInternal::htmlEscape(content, true); templarInternal::htmlEscape(content, true);
output.text = std::move(content); output.text = std::move(content);

View File

@ -110,3 +110,29 @@ inline std::string stripUrls(std::string &content) {
std::swap(output, content); std::swap(output, content);
return firstUrl; return firstUrl;
} }
inline void abbrevText(std::string &origStr, size_t maxLen) {
if (maxLen < 10) throw herr("abbrev too short");
if (origStr.size() <= maxLen) return;
std::string str = origStr.substr(0, maxLen-3);
{
// If string ends in a multi-byte UTF-8 encoded code-point, chop it off.
// This avoids cutting in the middle of an encoded code-point. It's a 99%
// solution, not perfect. See: https://metacpan.org/pod/Unicode::Truncate
auto endsInUtf8Extension = [&](){
return str.size() && (str.back() & 0b1100'0000) == 0b1000'0000;
};
if (endsInUtf8Extension()) {
do str.pop_back(); while (endsInUtf8Extension());
if (str.size()) str.pop_back();
}
}
str += "...";
std::swap(origStr, str);
}