[M87 Merge] Quick Answers: Filter invalid query text for current model
The current text annotation model we are using is not good at
concatenated words like "SampleClassName" and words contains special
characters like "test_example(". Since it takes time for us to roll up
to the v2 model, filter such kind of queries for now to improve the
quality.
(cherry picked from commit 7c206ad4f5d177ec931752a73bb1bb5ab3c302c3)
Bug: b/159664194
Test: Manual Test
Change-Id: I203388766b160fa7f3368702e1c75683a3e148ec
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2440832
Commit-Queue: Yue Li <[email protected]>
Reviewed-by: Xiyuan Xia <[email protected]>
Cr-Original-Commit-Position: refs/heads/master@{#812947}
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2472911
Reviewed-by: Yue Li <[email protected]>
Cr-Commit-Position: refs/branch-heads/4280@{#393}
Cr-Branched-From: ea420fb963f9658c9969b6513c56b8f47efa1a2a-refs/heads/master@{#812852}
diff --git a/chromeos/components/quick_answers/understanding/intent_generator.cc b/chromeos/components/quick_answers/understanding/intent_generator.cc
index 0ff68d3..319e50f1 100644
--- a/chromeos/components/quick_answers/understanding/intent_generator.cc
+++ b/chromeos/components/quick_answers/understanding/intent_generator.cc
@@ -29,6 +29,11 @@
constexpr int kTranslationTextLengthThreshold = 50;
constexpr int kDefinitionIntentAndSelectionLengthDiffThreshold = 2;
+// TODO(b/169370175): Remove the temporary invalid set after we ramp up to v2
+// model.
+// Set of invalid characters for definition annonations.
+constexpr char kInvalidCharactersSet[] = "()[]{}<>_&|!";
+
const std::map<std::string, IntentType>& GetIntentTypeMap() {
static base::NoDestructor<std::map<std::string, IntentType>> kIntentTypeMap(
{{"unit", IntentType::kUnit}, {"dictionary", IntentType::kDictionary}});
@@ -81,6 +86,28 @@
return intent;
}
+// TODO(b/169370175): There is an issue with text classifier that
+// concatenated words are annotated as definitions. Before we switch to v2
+// model, skip such kind of queries for definition annotation for now.
+bool ShouldSkipDefinition(const std::string& text) {
+ DCHECK(text.length());
+ // Skip the query for definition annotation if the selected text contains
+ // capitalized characters in the middle and not all capitalized.
+ const auto& text_utf16 = base::UTF8ToUTF16(text);
+ bool has_capitalized_middle_characters =
+ text_utf16.substr(1) != base::i18n::ToLower(text_utf16.substr(1));
+ bool are_all_characters_capitalized =
+ text_utf16 == base::i18n::ToUpper(text_utf16);
+ if (has_capitalized_middle_characters && !are_all_characters_capitalized)
+ return true;
+ // Skip the query for definition annotation if the selected text contains
+ // invalid characters.
+ if (text.find_first_of(kInvalidCharactersSet) != std::string::npos)
+ return true;
+
+ return false;
+}
+
} // namespace
IntentGenerator::IntentGenerator(IntentGeneratorCallback complete_callback)
@@ -145,6 +172,13 @@
auto intent_type_map = GetIntentTypeMap();
auto it = intent_type_map.find(type);
if (it != intent_type_map.end()) {
+ // Skip the entity for definition annonation.
+ if (it->second == IntentType::kDictionary &&
+ ShouldSkipDefinition(request.selected_text)) {
+ // Fallback to language detection for generating translation intent.
+ MaybeGenerateTranslationIntent(request);
+ return;
+ }
std::move(complete_callback_)
.Run(IntentInfo(entity_str, RewriteIntent(request.selected_text,
entity_str, it->second)));