mirror of
git://jb55.com/damus
synced 2024-09-29 00:10:43 +00:00
translation: add workaround to reduce wasteful translation requests
Signed-off-by: Terry Yiu <git@tyiu.xyz> Reviewed-by: William Casarin <jb55@jb55.com> Signed-off-by: William Casarin <jb55@jb55.com> Changelog-Fixed: Add workaround to fix note language recognition and reduce wasteful translation requests
This commit is contained in:
parent
eb41846bb9
commit
bf78c0a3a0
@ -23,6 +23,11 @@ public struct Translator {
|
||||
}
|
||||
|
||||
public func translate(_ text: String, from sourceLanguage: String, to targetLanguage: String) async throws -> String? {
|
||||
// Do not attempt to translate if the source and target languages are the same.
|
||||
guard sourceLanguage != targetLanguage else {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch userSettingsStore.translation_service {
|
||||
case .purple:
|
||||
return try await translateWithPurple(text, from: sourceLanguage, to: targetLanguage)
|
||||
@ -35,7 +40,7 @@ public struct Translator {
|
||||
case .deepl:
|
||||
return try await translateWithDeepL(text, from: sourceLanguage, to: targetLanguage)
|
||||
case .none:
|
||||
return text
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -411,7 +411,25 @@ extension NdbNote {
|
||||
let originalBlocks = self.blocks(keypair).blocks
|
||||
let originalOnlyText = originalBlocks.compactMap {
|
||||
if case .text(let txt) = $0 {
|
||||
return txt
|
||||
// Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (')
|
||||
// as a workaround to get Apple's language recognizer to predict language the correctly.
|
||||
// It is important to add this workaround to get the language right because it wastes users' money to send translation requests.
|
||||
// Until Apple fixes their language model, this workaround will be kept in place.
|
||||
// See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters.
|
||||
//
|
||||
// For example,
|
||||
// "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp"
|
||||
// has the note content "It’s a meme".
|
||||
// Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en),
|
||||
// which is a wildly incorrect hypothesis.
|
||||
// With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate.
|
||||
//
|
||||
// Similarly,
|
||||
// "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut"
|
||||
// has the note content "You’re funner".
|
||||
// Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en).
|
||||
// With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb).
|
||||
return txt.replacingOccurrences(of: "’", with: "'")
|
||||
}
|
||||
else {
|
||||
return nil
|
||||
@ -419,13 +437,17 @@ extension NdbNote {
|
||||
}
|
||||
.joined(separator: " ")
|
||||
|
||||
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
|
||||
// If there is no text, there's nothing to use to detect language.
|
||||
guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let languageRecognizer = NLLanguageRecognizer()
|
||||
languageRecognizer.processString(originalOnlyText)
|
||||
|
||||
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
|
||||
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
|
||||
let nstr: String? = nil
|
||||
return nstr
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
|
||||
|
Loading…
Reference in New Issue
Block a user