m87: merge fixes for native tts

This is a rebased change from trunk which was not able to be automatically merged through the Gerrit UI from
https://chromium-review.googlesource.com/c/chromium/src/+/2464067
and
https://chromium-review.googlesource.com/c/chromium/src/+/2465564

Bug: 1134289
Change-Id: I65cffdd85cd855707b51b30ff6de2c3464487579
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2466399
Reviewed-by: David Tseng <[email protected]>
Commit-Queue: David Tseng <[email protected]>
Cr-Commit-Position: refs/branch-heads/4280@{#249}
Cr-Branched-From: ea420fb963f9658c9969b6513c56b8f47efa1a2a-refs/heads/master@{#812852}
diff --git a/chromeos/services/tts/tts_service.cc b/chromeos/services/tts/tts_service.cc
index b6cd31a..cb97c6a 100644
--- a/chromeos/services/tts/tts_service.cc
+++ b/chromeos/services/tts/tts_service.cc
@@ -41,9 +41,7 @@
 // methods utilize C features only.
 
 TtsService::TtsService(mojo::PendingReceiver<mojom::TtsService> receiver)
-    : service_receiver_(this, std::move(receiver)),
-      stream_receiver_(this),
-      got_first_buffer_(false) {
+    : service_receiver_(this, std::move(receiver)), stream_receiver_(this) {
   if (setpriority(PRIO_PROCESS, 0, -10 /* real time audio */) != 0) {
     PLOG(ERROR) << "Unable to request real time priority; performance will be "
                    "impacted.";
@@ -62,7 +60,6 @@
 void TtsService::BindTtsStream(
     mojo::PendingReceiver<mojom::TtsStream> receiver,
     mojo::PendingRemote<audio::mojom::StreamFactory> factory) {
-  base::AutoLock al(state_lock_);
   stream_receiver_.Bind(std::move(receiver));
 
   // TODO(accessibility): The sample rate below can change based on the audio
@@ -79,8 +76,6 @@
 void TtsService::InstallVoice(const std::string& voice_name,
                               const std::vector<uint8_t>& voice_bytes,
                               InstallVoiceCallback callback) {
-  base::AutoLock al(state_lock_);
-
   // Create a directory to place extracted voice data.
   base::FilePath voice_data_path(kTempDataDirectory);
   voice_data_path = voice_data_path.Append(voice_name);
@@ -101,8 +96,6 @@
 
 void TtsService::SelectVoice(const std::string& voice_name,
                              SelectVoiceCallback callback) {
-  base::AutoLock al(state_lock_);
-
   base::FilePath path_prefix =
       base::FilePath(kTempDataDirectory).Append(voice_name);
   base::FilePath pipeline_path = path_prefix.Append("pipeline");
@@ -112,8 +105,6 @@
 
 void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
                        SpeakCallback callback) {
-  base::AutoLock al(state_lock_);
-
   tts_event_observer_.reset();
   auto pending_receiver = tts_event_observer_.BindNewPipeAndPassReceiver();
   std::move(callback).Run(std::move(pending_receiver));
@@ -126,6 +117,12 @@
   }
 
   output_device_->Play();
+
+  is_playing_ = true;
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
+                     true /* is_first_buffer */));
 }
 
 void TtsService::Stop() {
@@ -134,7 +131,6 @@
 }
 
 void TtsService::SetVolume(float volume) {
-  base::AutoLock al(state_lock_);
   output_device_->SetVolume(volume);
 }
 
@@ -142,61 +138,104 @@
                        base::TimeTicks delay_timestamp,
                        int prior_frames_skipped,
                        media::AudioBus* dest) {
-  // Careful to not block the render callback. Only try to acquire the lock
-  // here, but early return if we are processing a series of other calls. This
-  // can be extremely important if there's a long queue of pending Speak/Stop
-  // pairs being processed on the main thread. This can occur if the tts api
-  // receives lots of tts requests.
-  if (!state_lock_.Try()) {
-    return 0;
-  }
-
-  size_t frames = 0;
-  int32_t status =
-      libchrometts_.GoogleTtsReadBuffered(dest->channel(0), &frames);
-
-  if (status <= 0) {
-    // -1 means an error, 0 means done.
-    if (status == -1)
-      tts_event_observer_->OnError();
-
-    dest->Zero();
-    StopLocked();
-    state_lock_.Release();
-    return 0;
-  }
-
-  if (frames == 0) {
-    state_lock_.Release();
-    return 0;
-  }
-
-  if (!got_first_buffer_) {
-    got_first_buffer_ = true;
-    tts_event_observer_->OnStart();
-  }
-
-  // There's only really ever one timepoint since we play this buffer in one
-  // chunk.
+  size_t frames_in_buf = 0;
+  int32_t status = -1;
   int char_index = -1;
-  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
-    char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
+  bool is_first_buffer = false;
+  {
+    base::AutoLock al(state_lock_);
+    if (buffers_.empty())
+      return 0;
+
+    const AudioBuffer& buf = buffers_.front();
+
+    status = buf.status;
+
+    // Done, 0, or error, -1.
+    if (status <= 0) {
+      if (status == -1)
+        tts_event_observer_->OnError();
+      else
+        tts_event_observer_->OnEnd();
+
+      StopLocked();
+      return 0;
+    }
+
+    char_index = buf.char_index;
+    is_first_buffer = buf.is_first_buffer;
+    const float* frames = &buf.frames[0];
+    frames_in_buf = buf.frames.size();
+    float* channel = dest->channel(0);
+    for (size_t i = 0; i < frames_in_buf; i++)
+      channel[i] = frames[i];
+    buffers_.pop_front();
+  }
+
+  if (is_first_buffer)
+    tts_event_observer_->OnStart();
+
+  if (frames_in_buf == 0)
+    return 0;
 
   if (char_index != -1)
     tts_event_observer_->OnTimepoint(char_index);
 
-  state_lock_.Release();
-  return frames;
+  return frames_in_buf;
 }
 
 void TtsService::OnRenderError() {}
 
 void TtsService::StopLocked() {
+  if (!is_playing_)
+    return;
+
   output_device_->Pause();
-  libchrometts_.GoogleTtsFinalizeBuffered();
-  if (tts_event_observer_ && got_first_buffer_)
-    tts_event_observer_->OnEnd();
-  got_first_buffer_ = false;
+  buffers_.clear();
+  is_playing_ = false;
+}
+
+void TtsService::ReadMoreFrames(bool is_first_buffer) {
+  if (!is_playing_)
+    return;
+
+  AudioBuffer buf;
+  buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
+  size_t frames_in_buf = 0;
+  buf.status =
+      libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
+
+  buf.frames.resize(frames_in_buf);
+
+  buf.char_index = -1;
+  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
+    buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
+
+  buf.is_first_buffer = is_first_buffer;
+
+  {
+    base::AutoLock al(state_lock_);
+    buffers_.emplace_back(std::move(buf));
+  }
+
+  if (buf.status <= 0)
+    return;
+
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
+                     false /* is_first_buffer */));
+}
+
+TtsService::AudioBuffer::AudioBuffer() = default;
+
+TtsService::AudioBuffer::~AudioBuffer() = default;
+
+TtsService::AudioBuffer::AudioBuffer(TtsService::AudioBuffer&& other) {
+  frames.swap(other.frames);
+  status = other.status;
+  char_index = other.char_index;
+  is_first_buffer = other.is_first_buffer;
 }
 
 }  // namespace tts
diff --git a/chromeos/services/tts/tts_service.h b/chromeos/services/tts/tts_service.h
index 09fc3d4..061937b 100644
--- a/chromeos/services/tts/tts_service.h
+++ b/chromeos/services/tts/tts_service.h
@@ -54,6 +54,8 @@
   // Handles stopping tts.
   void StopLocked() EXCLUSIVE_LOCKS_REQUIRED(state_lock_);
 
+  void ReadMoreFrames(bool is_first_buffer);
+
   // Connection to tts in the browser.
   mojo::Receiver<mojom::TtsService> service_receiver_;
 
@@ -61,21 +63,35 @@
   base::Lock state_lock_;
 
   // Prebuilt.
-  LibChromeTtsLoader libchrometts_ GUARDED_BY(state_lock_);
+  LibChromeTtsLoader libchrometts_;
 
   // Connection to tts in the component extension.
-  mojo::Receiver<mojom::TtsStream> stream_receiver_ GUARDED_BY(state_lock_);
+  mojo::Receiver<mojom::TtsStream> stream_receiver_;
 
   // Connection to send tts events to component extension.
-  mojo::Remote<mojom::TtsEventObserver> tts_event_observer_
-      GUARDED_BY(state_lock_);
+  mojo::Remote<mojom::TtsEventObserver> tts_event_observer_;
 
   // Outputs speech synthesis to audio.
-  std::unique_ptr<audio::OutputDevice> output_device_ GUARDED_BY(state_lock_);
+  std::unique_ptr<audio::OutputDevice> output_device_;
 
-  // Tracks whether any audio data came as a result of |Speak|. Reset for every
-  // call to |Speak|.
-  bool got_first_buffer_ GUARDED_BY(state_lock_);
+  // Helper group of state to pass from main thread to audio thread.
+  struct AudioBuffer {
+    AudioBuffer();
+    ~AudioBuffer();
+    AudioBuffer(const AudioBuffer& other) = delete;
+    AudioBuffer(AudioBuffer&& other);
+
+    std::vector<float> frames;
+    int char_index;
+    int status;
+    bool is_first_buffer;
+  };
+
+  // The queue of audio buffers to be played by the audio thread.
+  std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
+
+  // Tracks whether the output device is playing audio.
+  bool is_playing_ = false;
 };
 
 }  // namespace tts