1
0
mirror of git://jb55.com/damus synced 2024-09-29 00:10:43 +00:00

translation: add workaround to reduce wasteful translation requests

Signed-off-by: Terry Yiu <git@tyiu.xyz>
Reviewed-by: William Casarin <jb55@jb55.com>
Signed-off-by: William Casarin <jb55@jb55.com>
Changelog-Fixed: Add workaround to fix note language recognition and reduce wasteful translation requests
This commit is contained in:
Terry Yiu 2024-01-07 14:07:09 -05:00 committed by William Casarin
parent eb41846bb9
commit bf78c0a3a0
2 changed files with 32 additions and 5 deletions

View File

@ -23,6 +23,11 @@ public struct Translator {
}
public func translate(_ text: String, from sourceLanguage: String, to targetLanguage: String) async throws -> String? {
// Do not attempt to translate if the source and target languages are the same.
guard sourceLanguage != targetLanguage else {
return nil
}
switch userSettingsStore.translation_service {
case .purple:
return try await translateWithPurple(text, from: sourceLanguage, to: targetLanguage)
@ -35,7 +40,7 @@ public struct Translator {
case .deepl:
return try await translateWithDeepL(text, from: sourceLanguage, to: targetLanguage)
case .none:
return text
return nil
}
}

View File

@ -411,7 +411,25 @@ extension NdbNote {
let originalBlocks = self.blocks(keypair).blocks
let originalOnlyText = originalBlocks.compactMap {
if case .text(let txt) = $0 {
return txt
// Replacing right single quotation marks () with "typewriter or ASCII apostrophes" (')
// as a workaround to get Apple's language recognizer to predict language the correctly.
// It is important to add this workaround to get the language right because it wastes users' money to send translation requests.
// Until Apple fixes their language model, this workaround will be kept in place.
// See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters.
//
// For example,
// "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp"
// has the note content "Its a meme".
// Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en),
// which is a wildly incorrect hypothesis.
// With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate.
//
// Similarly,
// "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut"
// has the note content "Youre funner".
// Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en).
// With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb).
return txt.replacingOccurrences(of: "", with: "'")
}
else {
return nil
@ -419,13 +437,17 @@ extension NdbNote {
}
.joined(separator: " ")
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
// If there is no text, there's nothing to use to detect language.
guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
return nil
}
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
let nstr: String? = nil
return nstr
return nil
}
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.