[M87 Merge] Quick Answers: Filter invalid query text for current model

The current text annotation model we are using is not good at
concatenated words like "SampleClassName" and words contains special
characters like "test_example(". Since it takes time for us to roll up
to the v2 model, filter such kind of queries for now to improve the
quality.

(cherry picked from commit 7c206ad4f5d177ec931752a73bb1bb5ab3c302c3)

Bug: b/159664194
Test: Manual Test
Change-Id: I203388766b160fa7f3368702e1c75683a3e148ec
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2440832
Commit-Queue: Yue Li <[email protected]>
Reviewed-by: Xiyuan Xia <[email protected]>
Cr-Original-Commit-Position: refs/heads/master@{#812947}
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2472911
Reviewed-by: Yue Li <[email protected]>
Cr-Commit-Position: refs/branch-heads/4280@{#393}
Cr-Branched-From: ea420fb963f9658c9969b6513c56b8f47efa1a2a-refs/heads/master@{#812852}
diff --git a/chromeos/components/quick_answers/understanding/intent_generator.cc b/chromeos/components/quick_answers/understanding/intent_generator.cc
index 0ff68d3..319e50f1 100644
--- a/chromeos/components/quick_answers/understanding/intent_generator.cc
+++ b/chromeos/components/quick_answers/understanding/intent_generator.cc
@@ -29,6 +29,11 @@
 constexpr int kTranslationTextLengthThreshold = 50;
 constexpr int kDefinitionIntentAndSelectionLengthDiffThreshold = 2;
 
+// TODO(b/169370175): Remove the temporary invalid set after we ramp up to v2
+// model.
+// Set of invalid characters for definition annonations.
+constexpr char kInvalidCharactersSet[] = "()[]{}<>_&|!";
+
 const std::map<std::string, IntentType>& GetIntentTypeMap() {
   static base::NoDestructor<std::map<std::string, IntentType>> kIntentTypeMap(
       {{"unit", IntentType::kUnit}, {"dictionary", IntentType::kDictionary}});
@@ -81,6 +86,28 @@
   return intent;
 }
 
+// TODO(b/169370175): There is an issue with text classifier that
+// concatenated words are annotated as definitions. Before we switch to v2
+// model, skip such kind of queries for definition annotation for now.
+bool ShouldSkipDefinition(const std::string& text) {
+  DCHECK(text.length());
+  // Skip the query for definition annotation if the selected text contains
+  // capitalized characters in the middle and not all capitalized.
+  const auto& text_utf16 = base::UTF8ToUTF16(text);
+  bool has_capitalized_middle_characters =
+      text_utf16.substr(1) != base::i18n::ToLower(text_utf16.substr(1));
+  bool are_all_characters_capitalized =
+      text_utf16 == base::i18n::ToUpper(text_utf16);
+  if (has_capitalized_middle_characters && !are_all_characters_capitalized)
+    return true;
+  // Skip the query for definition annotation if the selected text contains
+  // invalid characters.
+  if (text.find_first_of(kInvalidCharactersSet) != std::string::npos)
+    return true;
+
+  return false;
+}
+
 }  // namespace
 
 IntentGenerator::IntentGenerator(IntentGeneratorCallback complete_callback)
@@ -145,6 +172,13 @@
     auto intent_type_map = GetIntentTypeMap();
     auto it = intent_type_map.find(type);
     if (it != intent_type_map.end()) {
+      // Skip the entity for definition annonation.
+      if (it->second == IntentType::kDictionary &&
+          ShouldSkipDefinition(request.selected_text)) {
+        // Fallback to language detection for generating translation intent.
+        MaybeGenerateTranslationIntent(request);
+        return;
+      }
       std::move(complete_callback_)
           .Run(IntentInfo(entity_str, RewriteIntent(request.selected_text,
                                                     entity_str, it->second)));