diff --git a/.kokoro/continuous/doctest.cfg b/.kokoro/continuous/doctest.cfg
new file mode 100644
index 0000000000..dfdc78782f
--- /dev/null
+++ b/.kokoro/continuous/doctest.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "doctest"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/.kokoro/continuous/e2e.cfg b/.kokoro/continuous/e2e.cfg
index 3dbd0b47f0..e049dd30b3 100644
--- a/.kokoro/continuous/e2e.cfg
+++ b/.kokoro/continuous/e2e.cfg
@@ -3,7 +3,7 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "e2e doctest notebook unit_prerelease system_prerelease system_noextras"
+    value: "e2e unit_prerelease system_prerelease system_noextras"
 }
 
 env_vars: {
diff --git a/.kokoro/continuous/notebook.cfg b/.kokoro/continuous/notebook.cfg
new file mode 100644
index 0000000000..94e2a3c686
--- /dev/null
+++ b/.kokoro/continuous/notebook.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "notebook"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/.kokoro/load/benchmark.cfg b/.kokoro/load/benchmark.cfg
new file mode 100644
index 0000000000..a489e05bbc
--- /dev/null
+++ b/.kokoro/load/benchmark.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "benchmark"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/.kokoro/presubmit/doctest.cfg b/.kokoro/presubmit/doctest.cfg
new file mode 100644
index 0000000000..dfdc78782f
--- /dev/null
+++ b/.kokoro/presubmit/doctest.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "doctest"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/.kokoro/presubmit/e2e.cfg b/.kokoro/presubmit/e2e.cfg
index 3dbd0b47f0..e049dd30b3 100644
--- a/.kokoro/presubmit/e2e.cfg
+++ b/.kokoro/presubmit/e2e.cfg
@@ -3,7 +3,7 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "e2e doctest notebook unit_prerelease system_prerelease system_noextras"
+    value: "e2e unit_prerelease system_prerelease system_noextras"
 }
 
 env_vars: {
diff --git a/.kokoro/presubmit/notebook.cfg b/.kokoro/presubmit/notebook.cfg
new file mode 100644
index 0000000000..94e2a3c686
--- /dev/null
+++ b/.kokoro/presubmit/notebook.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "notebook"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cad061ce05..d585b5b1c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,30 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.9.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.8.0...v1.9.0) (2024-06-10)
+
+
+### Features
+
+* Allow functions returned from `bpd.read_gbq_function` to execute outside of `apply` ([#706](https://github.com/googleapis/python-bigquery-dataframes/issues/706)) ([ad7d8ac](https://github.com/googleapis/python-bigquery-dataframes/commit/ad7d8ac1247ec3b9532dd5375265c36907f50da2))
+* Support `bigquery.vector_search()` ([#736](https://github.com/googleapis/python-bigquery-dataframes/issues/736)) ([dad66fd](https://github.com/googleapis/python-bigquery-dataframes/commit/dad66fdd22bb2d507e7f366c970d971554598cf3))
+* Support `score()` in GeminiTextGenerator ([#740](https://github.com/googleapis/python-bigquery-dataframes/issues/740)) ([b2c7d8b](https://github.com/googleapis/python-bigquery-dataframes/commit/b2c7d8b28e235c839370818137fba71796c9f02a))
+* Support bytes type in `remote_function` ([#761](https://github.com/googleapis/python-bigquery-dataframes/issues/761)) ([4915424](https://github.com/googleapis/python-bigquery-dataframes/commit/4915424a68f36542e901a0ac27946f1ecb2d05ab))
+* Support fit() in GeminiTextGenerator ([#758](https://github.com/googleapis/python-bigquery-dataframes/issues/758)) ([d751f5c](https://github.com/googleapis/python-bigquery-dataframes/commit/d751f5cd1cf578618eabbb992cfb6b0a3c36608c))
+
+
+### Bug Fixes
+
+* ARIMAPlus loads auto_arima_min_order param ([#752](https://github.com/googleapis/python-bigquery-dataframes/issues/752)) ([39d7013](https://github.com/googleapis/python-bigquery-dataframes/commit/39d7013a8a8d2908f20bfe54a7dc8de166323b90))
+* Improve to_pandas_batches for large results ([#746](https://github.com/googleapis/python-bigquery-dataframes/issues/746)) ([61f18cb](https://github.com/googleapis/python-bigquery-dataframes/commit/61f18cb63f2785c03dc612a34c030079fc8f4172))
+* Resolve issue with unset thread-local options ([#741](https://github.com/googleapis/python-bigquery-dataframes/issues/741)) ([d93dbaf](https://github.com/googleapis/python-bigquery-dataframes/commit/d93dbafe2bb405c60f7141d9ae4135db4ffdb702))
+
+
+### Documentation
+
+* Fix ML.EVALUATE spelling ([#749](https://github.com/googleapis/python-bigquery-dataframes/issues/749)) ([7899749](https://github.com/googleapis/python-bigquery-dataframes/commit/7899749505a75ed89c68e9df64124a153644de96))
+* Remove LogisticRegression normal_equation strategy ([#753](https://github.com/googleapis/python-bigquery-dataframes/issues/753)) ([ea5d367](https://github.com/googleapis/python-bigquery-dataframes/commit/ea5d367d5ecc6826d30082e75c957af8362c9e61))
+
 ## [1.8.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.7.0...v1.8.0) (2024-05-31)
 
 
diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py
index 4729532e98..c9b2a3f95a 100644
--- a/bigframes/_config/__init__.py
+++ b/bigframes/_config/__init__.py
@@ -17,8 +17,12 @@
 DataFrames from this package.
 """
 
+from __future__ import annotations
+
 import copy
+from dataclasses import dataclass, field
 import threading
+from typing import Optional
 
 import bigframes_vendored.pandas._config.config as pandas_config
 
@@ -28,18 +32,27 @@
 import bigframes._config.sampling_options as sampling_options
 
 
+@dataclass
+class ThreadLocalConfig(threading.local):
+    # If unset, global settings will be used
+    bigquery_options: Optional[bigquery_options.BigQueryOptions] = None
+    # Note: use default factory instead of default instance so each thread initializes to default values
+    display_options: display_options.DisplayOptions = field(
+        default_factory=display_options.DisplayOptions
+    )
+    sampling_options: sampling_options.SamplingOptions = field(
+        default_factory=sampling_options.SamplingOptions
+    )
+    compute_options: compute_options.ComputeOptions = field(
+        default_factory=compute_options.ComputeOptions
+    )
+
+
 class Options:
     """Global options affecting BigQuery DataFrames behavior."""
 
     def __init__(self):
-        self._local = threading.local()
-
-        # Initialize these in the property getters to make sure we do have a
-        # separate instance per thread.
-        self._local.bigquery_options = None
-        self._local.display_options = None
-        self._local.sampling_options = None
-        self._local.compute_options = None
+        self._local = ThreadLocalConfig()
 
         # BigQuery options are special because they can only be set once per
         # session, so we need an indicator as to whether we are using the
@@ -61,21 +74,16 @@ def _init_bigquery_thread_local(self):
     @property
     def bigquery(self) -> bigquery_options.BigQueryOptions:
         """Options to use with the BigQuery engine."""
-        if (
-            bigquery_options := getattr(self._local, "bigquery_options", None)
-        ) is not None:
+        if self._local.bigquery_options is not None:
             # The only way we can get here is if someone called
             # _init_bigquery_thread_local.
-            return bigquery_options
+            return self._local.bigquery_options
 
         return self._bigquery_options
 
     @property
     def display(self) -> display_options.DisplayOptions:
         """Options controlling object representation."""
-        if self._local.display_options is None:
-            self._local.display_options = display_options.DisplayOptions()
-
         return self._local.display_options
 
     @property
@@ -88,17 +96,11 @@ def sampling(self) -> sampling_options.SamplingOptions:
         matplotlib plotting). This option can be overriden by
         parameters in specific functions.
         """
-        if self._local.sampling_options is None:
-            self._local.sampling_options = sampling_options.SamplingOptions()
-
         return self._local.sampling_options
 
     @property
     def compute(self) -> compute_options.ComputeOptions:
         """Thread-local options controlling object computation."""
-        if self._local.compute_options is None:
-            self._local.compute_options = compute_options.ComputeOptions()
-
         return self._local.compute_options
 
     @property
diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 5808aa28bf..85a9010a7d 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -21,11 +21,15 @@
 from __future__ import annotations
 
 import typing
+from typing import Literal, Optional, Union
 
 import bigframes.constants as constants
 import bigframes.core.groupby as groupby
+import bigframes.core.sql
+import bigframes.ml.utils as utils
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
+import bigframes.series
 
 if typing.TYPE_CHECKING:
     import bigframes.dataframe as dataframe
@@ -148,3 +152,153 @@ def array_to_string(series: series.Series, delimiter: str) -> series.Series:
 
     """
     return series._apply_unary_op(ops.ArrayToStringOp(delimiter=delimiter))
+
+
+def vector_search(
+    base_table: str,
+    column_to_search: str,
+    query: Union[dataframe.DataFrame, series.Series],
+    *,
+    query_column_to_search: Optional[str] = None,
+    top_k: Optional[int] = 10,
+    distance_type: Literal["euclidean", "cosine"] = "euclidean",
+    fraction_lists_to_search: Optional[float] = None,
+    use_brute_force: bool = False,
+) -> dataframe.DataFrame:
+    """
+    Conduct vector search which searches embeddings to find semantically similar entities.
+
+    **Examples:**
+
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+    DataFrame embeddings for which to find nearest neighbors. The ``ARRAY<FLOAT64>`` column
+    is used as the search query:
+
+        >>> search_query = bpd.DataFrame({"query_id": ["dog", "cat"],
+        ...                               "embedding": [[1.0, 2.0], [3.0, 5.2]]})
+        >>> bbq.vector_search(
+        ...             base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        ...             column_to_search="my_embedding",
+        ...             query=search_query,
+        ...             top_k=2)
+          query_id  embedding  id my_embedding  distance
+        1      cat  [3.  5.2]   5    [5.  5.4]  2.009975
+        0      dog    [1. 2.]   1      [1. 2.]       0.0
+        0      dog    [1. 2.]   4    [1.  3.2]       1.2
+        1      cat  [3.  5.2]   2      [2. 4.]   1.56205
+        <BLANKLINE>
+        [4 rows x 5 columns]
+
+    Series embeddings for which to find nearest neighbors:
+
+        >>> search_query = bpd.Series([[1.0, 2.0], [3.0, 5.2]],
+        ...                            index=["dog", "cat"],
+        ...                            name="embedding")
+        >>> bbq.vector_search(
+        ...             base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        ...             column_to_search="my_embedding",
+        ...             query=search_query,
+        ...             top_k=2)
+             embedding  id my_embedding  distance
+        dog    [1. 2.]   1      [1. 2.]       0.0
+        cat  [3.  5.2]   5    [5.  5.4]  2.009975
+        dog    [1. 2.]   4    [1.  3.2]       1.2
+        cat  [3.  5.2]   2      [2. 4.]   1.56205
+        <BLANKLINE>
+        [4 rows x 4 columns]
+
+    You can specify the name of the column in the query DataFrame embeddings and distance type.
+    If you specify query_column_to_search_value, it will use the provided column which contains
+    the embeddings for which to find nearest neighbors. Otherwiese, it uses the column_to_search value.
+
+        >>> search_query = bpd.DataFrame({"query_id": ["dog", "cat"],
+        ...                               "embedding": [[1.0, 2.0], [3.0, 5.2]],
+        ...                               "another_embedding": [[0.7, 2.2], [3.3, 5.2]]})
+        >>> bbq.vector_search(
+        ...             base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        ...             column_to_search="my_embedding",
+        ...             query=search_query,
+        ...             distance_type="cosine",
+        ...             query_column_to_search="another_embedding",
+        ...             top_k=2)
+          query_id  embedding another_embedding  id my_embedding  distance
+        1      cat  [3.  5.2]         [3.3 5.2]   2      [2. 4.]  0.005181
+        0      dog    [1. 2.]         [0.7 2.2]   4    [1.  3.2]  0.000013
+        1      cat  [3.  5.2]         [3.3 5.2]   1      [1. 2.]  0.005181
+        0      dog    [1. 2.]         [0.7 2.2]   3    [1.5 7. ]  0.004697
+        <BLANKLINE>
+        [4 rows x 6 columns]
+
+    Args:
+        base_table (str):
+            The table to search for nearest neighbor embeddings.
+        column_to_search (str):
+            The name of the base table column to search for nearest neighbor embeddings.
+            The column must have a type of ``ARRAY<FLOAT64>``. All elements in the array must be non-NULL.
+        query (bigframes.dataframe.DataFrame | bigframes.dataframe.Series):
+            A Series or DataFrame that provides the embeddings for which to find nearest neighbors.
+        query_column_to_search (str):
+            Specifies the name of the column in the query that contains the embeddings for which to
+            find nearest neighbors. The column must have a type of ``ARRAY<FLOAT64>``. All elements in
+            the array must be non-NULL and all values in the column must have the same array dimensions
+            as the values in the ``column_to_search`` column. Can only be set when query is a DataFrame.
+        top_k (int, default 10):
+            Sepecifies the number of nearest neighbors to return. Default to 10.
+        distance_type (str, defalt "euclidean"):
+            Specifies the type of metric to use to compute the distance between two vectors.
+            Possible values are "euclidean" and "cosine". Default to "euclidean".
+        fraction_lists_to_search (float, range in [0.0, 1.0]):
+            Specifies the percentage of lists to search. Specifying a higher percentage leads to
+            higher recall and slower performance, and the converse is true when specifying a lower
+            percentage. It is only used when a vector index is also used. You can only specify
+            ``fraction_lists_to_search`` when ``use_brute_force`` is set to False.
+        use_brute_force (bool, default False):
+            Determines whether to use brute force search by skipping the vector index if one is available.
+            Default to False.
+
+    Returns:
+        bigframes.dataframe.DataFrame: A DataFrame containing vector search result.
+    """
+    if not fraction_lists_to_search and use_brute_force is True:
+        raise ValueError(
+            "You can't specify fraction_lists_to_search when use_brute_force is set to True."
+        )
+    if (
+        isinstance(query, bigframes.series.Series)
+        and query_column_to_search is not None
+    ):
+        raise ValueError(
+            "You can't specify query_column_to_search when query is a Series."
+        )
+    # TODO(ashleyxu): Support options in vector search. b/344019989
+    if fraction_lists_to_search is not None or use_brute_force is True:
+        raise NotImplementedError(
+            f"fraction_lists_to_search and use_brute_force is not supported. {constants.FEEDBACK_LINK}"
+        )
+    options = {
+        "base_table": base_table,
+        "column_to_search": column_to_search,
+        "query_column_to_search": query_column_to_search,
+        "distance_type": distance_type,
+        "top_k": top_k,
+        "fraction_lists_to_search": fraction_lists_to_search,
+        "use_brute_force": use_brute_force,
+    }
+
+    (query,) = utils.convert_to_dataframe(query)
+    sql_string, index_col_ids, index_labels = query._to_sql_query(include_index=True)
+
+    sql = bigframes.core.sql.create_vector_search_sql(
+        sql_string=sql_string, options=options  # type: ignore
+    )
+    if index_col_ids is not None:
+        df = query._session.read_gbq(sql, index_col=index_col_ids)
+    else:
+        df = query._session.read_gbq(sql)
+    df.index.names = index_labels
+
+    return df
diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 133d271fed..e0b63b4a8c 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -60,6 +60,7 @@ class ArrayValue:
 
     node: nodes.BigFrameNode
 
+    # DO NOT use, on deprecation path
     @classmethod
     def from_ibis(
         cls,
@@ -69,11 +70,13 @@ def from_ibis(
         hidden_ordering_columns: Sequence[ibis_types.Value],
         ordering: orderings.ExpressionOrdering,
     ):
+        import bigframes.core.compile.ibis_types
+
         node = nodes.ReadGbqNode(
             table=table,
             table_session=session,
             columns=tuple(
-                bigframes.dtypes.ibis_value_to_canonical_type(column)
+                bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(column)
                 for column in columns
             ),
             hidden_ordering_columns=tuple(hidden_ordering_columns),
@@ -95,6 +98,23 @@ def from_pyarrow(cls, arrow_table: pa.Table, session: Session):
         )
         return cls(node)
 
+    @classmethod
+    def from_cached(
+        cls,
+        original: ArrayValue,
+        table: google.cloud.bigquery.Table,
+        ordering: orderings.ExpressionOrdering,
+    ):
+        node = nodes.CachedTableNode(
+            original_node=original.node,
+            project_id=table.reference.project,
+            dataset_id=table.reference.dataset_id,
+            table_id=table.reference.table_id,
+            physical_schema=tuple(table.schema),
+            ordering=ordering,
+        )
+        return cls(node)
+
     @classmethod
     def from_table(
         cls,
@@ -105,7 +125,10 @@ def from_table(
         predicate: Optional[str] = None,
         at_time: Optional[datetime.datetime] = None,
         primary_key: Sequence[str] = (),
+        offsets_col: Optional[str] = None,
     ):
+        if offsets_col and primary_key:
+            raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
             warnings.warn(
                 "Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.",
@@ -116,7 +139,8 @@ def from_table(
             dataset_id=table.reference.dataset_id,
             table_id=table.reference.table_id,
             physical_schema=tuple(table.schema),
-            total_order_cols=tuple(primary_key),
+            total_order_cols=(offsets_col,) if offsets_col else tuple(primary_key),
+            order_col_is_sequential=(offsets_col is not None),
             columns=schema,
             at_time=at_time,
             table_session=session,
@@ -150,6 +174,24 @@ def _compiled_schema(self) -> schemata.ArraySchema:
         )
         return schemata.ArraySchema(items)
 
+    def as_cached(
+        self: ArrayValue,
+        cache_table: google.cloud.bigquery.Table,
+        ordering: Optional[orderings.ExpressionOrdering],
+    ) -> ArrayValue:
+        """
+        Replace the node with an equivalent one that references a tabel where the value has been materialized to.
+        """
+        node = nodes.CachedTableNode(
+            original_node=self.node,
+            project_id=cache_table.reference.project,
+            dataset_id=cache_table.reference.dataset_id,
+            table_id=cache_table.reference.table_id,
+            physical_schema=tuple(cache_table.schema),
+            ordering=ordering,
+        )
+        return ArrayValue(node)
+
     def _try_evaluate_local(self):
         """Use only for unit testing paths - not fully featured. Will throw exception if fails."""
         import ibis
@@ -192,6 +234,8 @@ def promote_offsets(self, col_id: str) -> ArrayValue:
         """
         Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
         """
+        if not self.session._strictly_ordered:
+            raise ValueError("Generating offsets not supported in unordered mode")
         return ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id))
 
     def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
@@ -340,6 +384,10 @@ def project_window_op(
         never_skip_nulls: will disable null skipping for operators that would otherwise do so
         skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
         """
+        if not self.session._strictly_ordered:
+            # TODO: Support unbounded windows with aggregate ops and some row-order-independent analytic ops
+            # TODO: Support non-deterministic windowing
+            raise ValueError("Windowed ops not supported in unordered mode")
         return ArrayValue(
             nodes.WindowOpNode(
                 child=self.node,
@@ -391,8 +439,9 @@ def unpivot(
         """
         # There will be N labels, used to disambiguate which of N source columns produced each output row
         explode_offsets_id = bigframes.core.guid.generate_guid("unpivot_offsets_")
-        labels_array = self._create_unpivot_labels_array(row_labels, index_col_ids)
-        labels_array = labels_array.promote_offsets(explode_offsets_id)
+        labels_array = self._create_unpivot_labels_array(
+            row_labels, index_col_ids, explode_offsets_id
+        )
 
         # Unpivot creates N output rows for each input row, labels disambiguate these N rows
         joined_array = self._cross_join_w_labels(labels_array, join_side)
@@ -458,6 +507,7 @@ def _create_unpivot_labels_array(
         self,
         former_column_labels: typing.Sequence[typing.Hashable],
         col_ids: typing.Sequence[str],
+        offsets_id: str,
     ) -> ArrayValue:
         """Create an ArrayValue from a list of label tuples."""
         rows = []
@@ -468,6 +518,7 @@ def _create_unpivot_labels_array(
                 col_ids[i]: (row_label[i] if pandas.notnull(row_label[i]) else None)
                 for i in range(len(col_ids))
             }
+            row[offsets_id] = row_offset
             rows.append(row)
 
         return ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=self.session)
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index ea063669d5..301bcc20e9 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -508,11 +508,24 @@ def try_peek(
         else:
             return None
 
-    def to_pandas_batches(self):
-        """Download results one message at a time."""
+    def to_pandas_batches(
+        self, page_size: Optional[int] = None, max_results: Optional[int] = None
+    ):
+        """Download results one message at a time.
+
+        page_size and max_results determine the size and number of batches,
+        see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result"""
         dtypes = dict(zip(self.index_columns, self.index.dtypes))
         dtypes.update(zip(self.value_columns, self.dtypes))
-        results_iterator, _ = self.session._execute(self.expr, sorted=True)
+        _, query_job = self.session._query_to_destination(
+            self.session._to_sql(self.expr, sorted=True),
+            list(self.index_columns),
+            api_name="cached",
+            do_clustering=False,
+        )
+        results_iterator = query_job.result(
+            page_size=page_size, max_results=max_results
+        )
         for arrow_table in results_iterator.to_arrow_iterable(
             bqstorage_client=self.session.bqstoragereadclient
         ):
@@ -540,7 +553,7 @@ def _materialize_local(
         """Run query and download results as a pandas DataFrame. Return the total number of results as well."""
         # TODO(swast): Allow for dry run and timeout.
         _, query_job = self.session._query_to_destination(
-            self.session._to_sql(self.expr, sorted=True),
+            self.session._to_sql(self.expr, sorted=materialize_options.ordered),
             list(self.index_columns),
             api_name="cached",
             do_clustering=False,
@@ -1003,7 +1016,7 @@ def aggregate_all_and_stack(
                 index_columns=[index_id],
                 column_labels=self.column_labels,
                 index_labels=[None],
-            ).transpose(original_row_index=pd.Index([None]))
+            ).transpose(original_row_index=pd.Index([None]), single_row_mode=True)
         else:  # axis_n == 1
             # using offsets as identity to group on.
             # TODO: Allow to promote identity/total_order columns instead for better perf
@@ -1646,6 +1659,8 @@ def melt(
         value_vars=typing.Sequence[str],
         var_names=typing.Sequence[typing.Hashable],
         value_name: typing.Hashable = "value",
+        *,
+        create_offsets_index: bool = True,
     ):
         """
         Unpivot columns to produce longer, narrower dataframe.
@@ -1666,20 +1681,31 @@ def melt(
             index_col_ids=var_col_ids,
             join_side="right",
         )
-        index_id = guid.generate_guid()
-        unpivot_expr = unpivot_expr.promote_offsets(index_id)
+
+        if create_offsets_index:
+            index_id = guid.generate_guid()
+            unpivot_expr = unpivot_expr.promote_offsets(index_id)
+            index_cols = [index_id]
+        else:
+            index_cols = []
+
         # Need to reorder to get id_vars before var_col and unpivot_col
         unpivot_expr = unpivot_expr.select_columns(
-            [index_id, *id_vars, *var_col_ids, unpivot_col_id]
+            [*index_cols, *id_vars, *var_col_ids, unpivot_col_id]
         )
 
         return Block(
             unpivot_expr,
             column_labels=[*id_labels, *var_names, value_name],
-            index_columns=[index_id],
+            index_columns=index_cols,
         )
 
-    def transpose(self, *, original_row_index: Optional[pd.Index] = None) -> Block:
+    def transpose(
+        self,
+        *,
+        original_row_index: Optional[pd.Index] = None,
+        single_row_mode: bool = False,
+    ) -> Block:
         """Transpose the block. Will fail if dtypes aren't coercible to a common type or too many rows.
         Can provide the original_row_index directly if it is already known, otherwise a query is needed.
         """
@@ -1705,7 +1731,11 @@ def transpose(self, *, original_row_index: Optional[pd.Index] = None) -> Block:
                 block.column_labels, pd.Index(range(len(block.column_labels)))
             )
         )
-        numbered_block, offsets = numbered_block.promote_offsets()
+        # TODO: Determine if single row from expression tree (after aggregation without groupby)
+        if single_row_mode:
+            numbered_block, offsets = numbered_block.create_constant(0)
+        else:
+            numbered_block, offsets = numbered_block.promote_offsets()
 
         stacked_block = numbered_block.melt(
             id_vars=(offsets,),
@@ -1714,6 +1744,7 @@ def transpose(self, *, original_row_index: Optional[pd.Index] = None) -> Block:
                 "col_offset",
             ),
             value_vars=block.value_columns,
+            create_offsets_index=False,
         )
         col_labels = stacked_block.value_columns[-2 - original_col_index.nlevels : -2]
         col_offset = stacked_block.value_columns[-2]  # disambiguator we created earlier
@@ -2339,12 +2370,19 @@ def _get_rows_as_json_values(self) -> Block:
         index_columns_count = len(self.index_columns)
 
         # column references to form the array of values for the row
-        column_references_csv = sql.csv(
-            [sql.cast_as_string(col) for col in self.expr.column_ids]
-        )
+        column_types = list(self.index.dtypes) + list(self.dtypes)
+        column_references = []
+        for type_, col in zip(column_types, self.expr.column_ids):
+            if isinstance(type_, pd.ArrowDtype) and pa.types.is_binary(
+                type_.pyarrow_dtype
+            ):
+                column_references.append(sql.to_json_string(col))
+            else:
+                column_references.append(sql.cast_as_string(col))
+
+        column_references_csv = sql.csv(column_references)
 
         # types of the columns to serialize for the row
-        column_types = list(self.index.dtypes) + list(self.dtypes)
         column_types_csv = sql.csv(
             [sql.simple_literal(str(typ)) for typ in column_types]
         )
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index fada4ebbd8..58973b10eb 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -22,10 +22,10 @@
 import pandas as pd
 
 import bigframes.constants as constants
+import bigframes.core.compile.ibis_types as compile_ibis_types
 import bigframes.core.compile.scalar_op_compiler as scalar_compilers
 import bigframes.core.expression as ex
 import bigframes.core.window_spec as window_spec
-import bigframes.dtypes as dtypes
 import bigframes.operations.aggregations as agg_ops
 
 scalar_compiler = scalar_compilers.scalar_op_compiler
@@ -323,7 +323,7 @@ def _(
             for this_bin in range(op.bins - 1):
                 out = out.when(
                     x <= (col_min + (this_bin + 1) * bin_width),
-                    dtypes.literal_to_ibis_scalar(
+                    compile_ibis_types.literal_to_ibis_scalar(
                         this_bin, force_dtype=pd.Int64Dtype()
                     ),
                 )
@@ -352,8 +352,8 @@ def _(
                     out = out.when(x.notnull(), interval_struct)
     else:  # Interpret as intervals
         for interval in op.bins:
-            left = dtypes.literal_to_ibis_scalar(interval[0])
-            right = dtypes.literal_to_ibis_scalar(interval[1])
+            left = compile_ibis_types.literal_to_ibis_scalar(interval[0])
+            right = compile_ibis_types.literal_to_ibis_scalar(interval[1])
             condition = (x > left) & (x <= right)
             interval_struct = ibis.struct(
                 {"left_exclusive": left, "right_inclusive": right}
@@ -370,7 +370,7 @@ def _(
     window=None,
 ) -> ibis_types.IntegerValue:
     if isinstance(self.quantiles, int):
-        quantiles_ibis = dtypes.literal_to_ibis_scalar(self.quantiles)
+        quantiles_ibis = compile_ibis_types.literal_to_ibis_scalar(self.quantiles)
         percent_ranks = cast(
             ibis_types.FloatingColumn,
             _apply_window_if_present(column.percent_rank(), window),
@@ -383,13 +383,19 @@ def _(
             _apply_window_if_present(column.percent_rank(), window),
         )
         out = ibis.case()
-        first_ibis_quantile = dtypes.literal_to_ibis_scalar(self.quantiles[0])
+        first_ibis_quantile = compile_ibis_types.literal_to_ibis_scalar(
+            self.quantiles[0]
+        )
         out = out.when(percent_ranks < first_ibis_quantile, None)
         for bucket_n in range(len(self.quantiles) - 1):
-            ibis_quantile = dtypes.literal_to_ibis_scalar(self.quantiles[bucket_n + 1])
+            ibis_quantile = compile_ibis_types.literal_to_ibis_scalar(
+                self.quantiles[bucket_n + 1]
+            )
             out = out.when(
                 percent_ranks <= ibis_quantile,
-                dtypes.literal_to_ibis_scalar(bucket_n, force_dtype=pd.Int64Dtype()),
+                compile_ibis_types.literal_to_ibis_scalar(
+                    bucket_n, force_dtype=pd.Int64Dtype()
+                ),
             )
         out = out.else_(None)
         return out.end()  # type: ignore
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 552061f612..dac814a08c 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -28,6 +28,7 @@
 import pandas
 
 import bigframes.core.compile.aggregate_compiler as agg_compiler
+import bigframes.core.compile.ibis_types
 import bigframes.core.compile.scalar_op_compiler as op_compilers
 import bigframes.core.expression as ex
 import bigframes.core.guid
@@ -157,16 +158,19 @@ def _get_ibis_column(self, key: str) -> ibis_types.Value:
             )
         return typing.cast(
             ibis_types.Value,
-            bigframes.dtypes.ibis_value_to_canonical_type(self._column_names[key]),
+            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                self._column_names[key]
+            ),
         )
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         ibis_type = typing.cast(
-            bigframes.dtypes.IbisDtype, self._get_ibis_column(key).type()
+            bigframes.core.compile.ibis_types.IbisDtype,
+            self._get_ibis_column(key).type(),
         )
         return typing.cast(
             bigframes.dtypes.Dtype,
-            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type),
+            bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(ibis_type),
         )
 
     def _aggregate_base(
@@ -332,7 +336,8 @@ def _to_ibis_expr(
         # Make sure all dtypes are the "canonical" ones for BigFrames. This is
         # important for operations like UNION where the schema must match.
         table = self._table.select(
-            bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
+            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(column)
+            for column in columns
         )
         base_table = table
         if self._reduced_predicate is not None:
@@ -579,7 +584,10 @@ def from_pandas(
         ibis_values = ibis_values.assign(**{ORDER_ID_COLUMN: range(len(pd_df))})
         # derive the ibis schema from the original pandas schema
         ibis_schema = [
-            (name, bigframes.dtypes.bigframes_dtype_to_ibis_dtype(dtype))
+            (
+                name,
+                bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(dtype),
+            )
             for name, dtype in zip(schema.names, schema.dtypes)
         ]
         ibis_schema.append((ORDER_ID_COLUMN, ibis_dtypes.int64))
@@ -993,7 +1001,9 @@ def _to_ibis_expr(
         # Make sure all dtypes are the "canonical" ones for BigFrames. This is
         # important for operations like UNION where the schema must match.
         table = table.select(
-            bigframes.dtypes.ibis_value_to_canonical_type(table[column])
+            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                table[column]
+            )
             for column in table.columns
         )
         base_table = table
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index f948d10a5b..021ec8b176 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -26,11 +26,11 @@
 import bigframes.core.compile.compiled as compiled
 import bigframes.core.compile.concat as concat_impl
 import bigframes.core.compile.default_ordering as default_ordering
+import bigframes.core.compile.ibis_types
 import bigframes.core.compile.schema_translator
 import bigframes.core.compile.single_column
 import bigframes.core.nodes as nodes
 import bigframes.core.ordering as bf_ordering
-import bigframes.dtypes as bigframes_dtypes
 
 if typing.TYPE_CHECKING:
     import bigframes.core
@@ -96,6 +96,48 @@ def compile_readlocal(node: nodes.ReadLocalNode, ordered: bool = True):
         return ordered_ir.to_unordered()
 
 
+@_compile_node.register
+def compile_cached_table(node: nodes.CachedTableNode, ordered: bool = True):
+    full_table_name = f"{node.project_id}.{node.dataset_id}.{node.table_id}"
+    used_columns = (
+        *node.schema.names,
+        *node.hidden_columns,
+    )
+    # Physical schema might include unused columns, unsupported datatypes like JSON
+    physical_schema = ibis.backends.bigquery.BigQuerySchema.to_ibis(
+        list(i for i in node.physical_schema if i.name in used_columns)
+    )
+    ibis_table = ibis.table(physical_schema, full_table_name)
+    if ordered:
+        if node.ordering is None:
+            # If this happens, session malfunctioned while applying cached results.
+            raise ValueError(
+                "Cannot use unordered cached value. Result requires ordering information."
+            )
+        return compiled.OrderedIR(
+            ibis_table,
+            columns=tuple(
+                bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                    ibis_table[col]
+                )
+                for col in node.schema.names
+            ),
+            ordering=node.ordering,
+            hidden_ordering_columns=[ibis_table[c] for c in node.hidden_columns],
+        )
+
+    else:
+        return compiled.UnorderedIR(
+            ibis_table,
+            columns=tuple(
+                bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                    ibis_table[col]
+                )
+                for col in node.schema.names
+            ),
+        )
+
+
 @_compile_node.register
 def compile_readtable(node: nodes.ReadTableNode, ordered: bool = True):
     if ordered:
@@ -133,7 +175,9 @@ def compile_read_table_unordered(node: nodes.ReadTableNode):
     return compiled.UnorderedIR(
         ibis_table,
         tuple(
-            bigframes_dtypes.ibis_value_to_canonical_type(ibis_table[col])
+            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                ibis_table[col]
+            )
             for col in node.schema.names
         ),
     )
@@ -169,7 +213,9 @@ def compile_read_table_ordered(node: nodes.ReadTableNode):
     return compiled.OrderedIR(
         ibis_table,
         columns=tuple(
-            bigframes_dtypes.ibis_value_to_canonical_type(ibis_table[col])
+            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
+                ibis_table[col]
+            )
             for col in node.schema.names
         ),
         ordering=ordering,
diff --git a/bigframes/core/compile/googlesql/__init__.py b/bigframes/core/compile/googlesql/__init__.py
new file mode 100644
index 0000000000..32265c0d51
--- /dev/null
+++ b/bigframes/core/compile/googlesql/__init__.py
@@ -0,0 +1,52 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Python classes representing GoogleSQL syntax nodes, adhering to the official syntax:
+https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax"""
+
+from __future__ import annotations
+
+from bigframes.core.compile.googlesql.expression import (
+    AliasExpression,
+    ColumnExpression,
+    CTEExpression,
+    StarExpression,
+    TableExpression,
+)
+from bigframes.core.compile.googlesql.query import (
+    AsAlias,
+    FromClause,
+    FromItem,
+    NonRecursiveCTE,
+    QueryExpr,
+    Select,
+    SelectAll,
+    SelectExpression,
+)
+
+__all__ = [
+    "AliasExpression",
+    "AsAlias",
+    "ColumnExpression",
+    "CTEExpression",
+    "FromClause",
+    "FromItem",
+    "NonRecursiveCTE",
+    "QueryExpr",
+    "Select",
+    "SelectAll",
+    "SelectExpression",
+    "StarExpression",
+    "TableExpression",
+]
diff --git a/bigframes/core/compile/googlesql/abc.py b/bigframes/core/compile/googlesql/abc.py
new file mode 100644
index 0000000000..081836467c
--- /dev/null
+++ b/bigframes/core/compile/googlesql/abc.py
@@ -0,0 +1,25 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import abc
+
+
+class SQLSyntax(abc.ABC):
+    """Abstract base class provides GoogleSQL syntax."""
+
+    @abc.abstractmethod
+    def sql(self):
+        ...
diff --git a/bigframes/core/compile/googlesql/expression.py b/bigframes/core/compile/googlesql/expression.py
new file mode 100644
index 0000000000..702aa2c5e5
--- /dev/null
+++ b/bigframes/core/compile/googlesql/expression.py
@@ -0,0 +1,95 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import dataclasses
+import typing
+
+import bigframes.core.compile.googlesql.abc as abc
+
+"""This module represents GoogleSQL `expression` and its extensions.
+Core class:
+
+* `expression`: Models basic SQL expressions.
+
+Extended classes (not part of standard GoogleSQL syntax, but added for convenience):
+
+* `ColumnExpression`:  Represents column references.
+* `TableExpression`:   Represents table references.
+* `AliasExpression`:   Represents aliased expressions.
+* ...
+"""
+
+
+@dataclasses.dataclass
+class Expression(abc.SQLSyntax):
+    pass
+
+
+@dataclasses.dataclass
+class ColumnExpression(Expression):
+    name: str
+    parent: typing.Optional[TableExpression | AliasExpression | CTEExpression] = None
+
+    def sql(self) -> str:
+        if self.parent is not None:
+            return f"{self.parent.sql()}.`{self.name}`"
+        return f"`{self.name}`"
+
+
+@dataclasses.dataclass
+class StarExpression(Expression):
+    parent: typing.Optional[TableExpression | AliasExpression | CTEExpression] = None
+
+    def sql(self) -> str:
+        if self.parent is not None:
+            return f"{self.parent.sql()}.*"
+        return "*"
+
+
+@dataclasses.dataclass
+class TableExpression(Expression):
+    table_id: str
+    dataset_id: typing.Optional[str] = None
+    project_id: typing.Optional[str] = None
+
+    def __post_init__(self):
+        if self.project_id is not None and self.dataset_id is None:
+            raise ValueError("The `dataset_id` is missing.")
+
+    def sql(self) -> str:
+        text = []
+        if self.project_id is not None:
+            text.append(f"`{self.project_id}`")
+        if self.dataset_id is not None:
+            text.append(f"`{self.dataset_id}`")
+        text.append(f"`{self.table_id}`")
+        return ".".join(text)
+
+
+@dataclasses.dataclass
+class AliasExpression(Expression):
+    alias: str
+
+    def sql(self) -> str:
+        return f"`{self.alias}`"
+
+
+@dataclasses.dataclass
+class CTEExpression(Expression):
+    name: str
+
+    def sql(self) -> str:
+        return f"`{self.name}`"
diff --git a/bigframes/core/compile/googlesql/query.py b/bigframes/core/compile/googlesql/query.py
new file mode 100644
index 0000000000..6210aa67f4
--- /dev/null
+++ b/bigframes/core/compile/googlesql/query.py
@@ -0,0 +1,165 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import dataclasses
+import typing
+
+import bigframes.core.compile.googlesql.abc as abc
+import bigframes.core.compile.googlesql.expression as expr
+
+"""This module provides a structured representation of GoogleSQL syntax using nodes.
+Each node's name and child nodes are designed to strictly follow the official GoogleSQL
+syntax rules outlined in the documentation:
+https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax"""
+
+
+@dataclasses.dataclass
+class QueryExpr(abc.SQLSyntax):
+    """This class represents GoogleSQL `query_expr` syntax."""
+
+    select: Select
+    with_cte_list: typing.Sequence[NonRecursiveCTE] = ()
+
+    def sql(self) -> str:
+        text = []
+        if len(self.with_cte_list) > 0:
+            with_cte_text = ",\n".join(
+                [with_cte.sql() for with_cte in self.with_cte_list]
+            )
+            text.append(f"WITH {with_cte_text}")
+
+        text.append(self.select.sql())
+        return "\n".join(text)
+
+
+@dataclasses.dataclass
+class Select(abc.SQLSyntax):
+    """This class represents GoogleSQL `select` syntax."""
+
+    select_list: typing.Sequence[typing.Union[SelectExpression, SelectAll]]
+    from_clause_list: typing.Sequence[FromClause] = ()
+
+    def sql(self) -> str:
+        text = ["SELECT"]
+
+        select_list_sql = ",\n".join([select.sql() for select in self.select_list])
+        text.append(select_list_sql)
+
+        if self.from_clause_list is not None:
+            from_clauses_sql = ",\n".join(
+                [clause.sql() for clause in self.from_clause_list]
+            )
+            text.append(f"FROM\n{from_clauses_sql}")
+        return "\n".join(text)
+
+
+@dataclasses.dataclass
+class SelectExpression(abc.SQLSyntax):
+    """This class represents `select_expression`."""
+
+    expression: expr.ColumnExpression
+    alias: typing.Optional[expr.AliasExpression] = None
+
+    def sql(self) -> str:
+        if self.alias is None:
+            return self.expression.sql()
+        else:
+            return f"{self.expression.sql()} AS {self.alias.sql()}"
+
+
+@dataclasses.dataclass
+class SelectAll(abc.SQLSyntax):
+    """This class represents `select_all` (aka. `SELECT *`)."""
+
+    expression: expr.StarExpression
+
+    def sql(self) -> str:
+        return self.expression.sql()
+
+
+@dataclasses.dataclass
+class FromClause(abc.SQLSyntax):
+    """This class represents GoogleSQL `from_clause` syntax."""
+
+    from_item: FromItem
+
+    def sql(self) -> str:
+        return self.from_item.sql()
+
+
+@dataclasses.dataclass
+class FromItem(abc.SQLSyntax):
+    """This class represents GoogleSQL `from_item` syntax."""
+
+    table_name: typing.Optional[expr.TableExpression] = None
+    # Note: Temporarily introduces the `str` type to interact with pre-existing,
+    # compiled SQL strings.
+    query_expr: typing.Optional[QueryExpr | str] = None
+    cte_name: typing.Optional[expr.CTEExpression] = None
+    as_alias: typing.Optional[AsAlias] = None
+
+    def __post_init__(self):
+        non_none = sum(
+            expr is not None
+            for expr in [
+                self.table_name,
+                self.query_expr,
+                self.cte_name,
+            ]
+        )
+        if non_none != 1:
+            raise ValueError("Exactly one of expressions must be provided.")
+
+    def sql(self) -> str:
+        if self.table_name is not None:
+            text = self.table_name.sql()
+        elif self.query_expr is not None:
+            text = (
+                self.query_expr
+                if isinstance(self.query_expr, str)
+                else self.query_expr.sql()
+            )
+            text = f"({text})"
+        elif self.cte_name is not None:
+            text = self.cte_name.sql()
+        else:
+            raise ValueError("One of from items must be provided.")
+
+        if self.as_alias is None:
+            return text
+        else:
+            return f"{text} {self.as_alias.sql()}"
+
+
+@dataclasses.dataclass
+class NonRecursiveCTE(abc.SQLSyntax):
+    """This class represents GoogleSQL `non_recursive_cte` syntax."""
+
+    cte_name: expr.CTEExpression
+    query_expr: QueryExpr
+
+    def sql(self) -> str:
+        return f"{self.cte_name.sql()} AS (\n{self.query_expr.sql()}\n)"
+
+
+@dataclasses.dataclass
+class AsAlias(abc.SQLSyntax):
+    """This class represents GoogleSQL `as_alias` syntax."""
+
+    alias: expr.AliasExpression
+
+    def sql(self) -> str:
+        return f"AS {self.alias.sql()}"
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
new file mode 100644
index 0000000000..f73fce3e4d
--- /dev/null
+++ b/bigframes/core/compile/ibis_types.py
@@ -0,0 +1,476 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import textwrap
+from typing import Any, cast, Dict, Iterable, Optional, Tuple, Union
+import warnings
+
+import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
+import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
+import geopandas as gpd  # type: ignore
+import google.cloud.bigquery as bigquery
+import ibis
+import ibis.expr.datatypes as ibis_dtypes
+from ibis.expr.datatypes.core import dtype as python_type_to_bigquery_type
+import ibis.expr.types as ibis_types
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+
+import bigframes.constants as constants
+import bigframes.dtypes
+
+# Type hints for Ibis data types supported by BigQuery DataFrame
+IbisDtype = Union[
+    ibis_dtypes.Boolean,
+    ibis_dtypes.Float64,
+    ibis_dtypes.Int64,
+    ibis_dtypes.String,
+    ibis_dtypes.Date,
+    ibis_dtypes.Time,
+    ibis_dtypes.Timestamp,
+]
+
+
+BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, bigframes.dtypes.Dtype]] = (
+    (ibis_dtypes.boolean, pd.BooleanDtype()),
+    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
+    (ibis_dtypes.float64, pd.Float64Dtype()),
+    (ibis_dtypes.int64, pd.Int64Dtype()),
+    (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
+    (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
+    (ibis_dtypes.Timestamp(timezone=None), pd.ArrowDtype(pa.timestamp("us"))),
+    (
+        ibis_dtypes.Timestamp(timezone="UTC"),
+        pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+    ),
+    (ibis_dtypes.binary, pd.ArrowDtype(pa.binary())),
+    (
+        ibis_dtypes.Decimal(precision=38, scale=9, nullable=True),
+        pd.ArrowDtype(pa.decimal128(38, 9)),
+    ),
+    (
+        ibis_dtypes.Decimal(precision=76, scale=38, nullable=True),
+        pd.ArrowDtype(pa.decimal256(76, 38)),
+    ),
+    (
+        ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True),
+        gpd.array.GeometryDtype(),
+    ),
+)
+
+BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = {
+    pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
+}
+
+IBIS_TO_ARROW: Dict[ibis_dtypes.DataType, pa.DataType] = {
+    ibis_dtypes.boolean: pa.bool_(),
+    ibis_dtypes.date: pa.date32(),
+    ibis_dtypes.float64: pa.float64(),
+    ibis_dtypes.int64: pa.int64(),
+    ibis_dtypes.string: pa.string(),
+    ibis_dtypes.time: pa.time64("us"),
+    ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
+    ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
+    ibis_dtypes.binary: pa.binary(),
+    ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): pa.decimal128(38, 9),
+    ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): pa.decimal256(76, 38),
+}
+
+ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
+
+IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, bigframes.dtypes.Dtype] = {
+    ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
+}
+# Allow REQUIRED fields to map correctly.
+IBIS_TO_BIGFRAMES.update(
+    {ibis.copy(nullable=False): pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS}
+)
+IBIS_TO_BIGFRAMES.update(
+    {
+        # TODO: Interval
+    }
+)
+
+
+def cast_ibis_value(
+    value: ibis_types.Value, to_type: ibis_dtypes.DataType
+) -> ibis_types.Value:
+    """Perform compatible type casts of ibis values
+
+    Args:
+        value:
+            Ibis value, which could be a literal, scalar, or column
+
+        to_type:
+            The Ibis type to cast to
+
+    Returns:
+        A new Ibis value of type to_type
+
+    Raises:
+        TypeError: if the type cast cannot be executed"""
+    if value.type() == to_type:
+        return value
+    # casts that just work
+    # TODO(bmil): add to this as more casts are verified
+    good_casts = {
+        ibis_dtypes.bool: (ibis_dtypes.int64,),
+        ibis_dtypes.int64: (
+            ibis_dtypes.bool,
+            ibis_dtypes.float64,
+            ibis_dtypes.string,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+            ibis_dtypes.time,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.float64: (
+            ibis_dtypes.string,
+            ibis_dtypes.int64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+        ),
+        ibis_dtypes.string: (
+            ibis_dtypes.int64,
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+            ibis_dtypes.binary,
+            ibis_dtypes.date,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.date: (
+            ibis_dtypes.string,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.Decimal(precision=38, scale=9): (
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=76, scale=38),
+        ),
+        ibis_dtypes.Decimal(precision=76, scale=38): (
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+        ),
+        ibis_dtypes.time: (
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+        ),
+        ibis_dtypes.timestamp: (
+            ibis_dtypes.date,
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+            ibis_dtypes.time,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.Timestamp(timezone="UTC"): (
+            ibis_dtypes.date,
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+            ibis_dtypes.time,
+            ibis_dtypes.timestamp,
+        ),
+        ibis_dtypes.binary: (ibis_dtypes.string,),
+    }
+
+    value = ibis_value_to_canonical_type(value)
+    if value.type() in good_casts:
+        if to_type in good_casts[value.type()]:
+            return value.cast(to_type)
+    else:
+        # this should never happen
+        raise TypeError(
+            f"Unexpected value type {value.type()}. {constants.FEEDBACK_LINK}"
+        )
+
+    # casts that need some encouragement
+
+    # BigQuery casts bools to lower case strings. Capitalize the result to match Pandas
+    # TODO(bmil): remove this workaround after fixing Ibis
+    if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.string:
+        return cast(ibis_types.StringValue, value.cast(to_type)).capitalize()
+
+    if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
+        return value.cast(ibis_dtypes.int64).cast(ibis_dtypes.float64)
+
+    if value.type() == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
+        return value != ibis_types.literal(0)
+
+    raise TypeError(
+        f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
+    )
+
+
+def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
+    """Converts an Ibis expression to canonical type.
+
+    This is useful in cases where multiple types correspond to the same BigFrames dtype.
+    """
+    ibis_type = value.type()
+    name = value.get_name()
+    if ibis_type.is_json():
+        value = vendored_ibis_ops.ToJsonString(value).to_expr()
+        return value.name(name)
+    # Allow REQUIRED fields to be joined with NULLABLE fields.
+    nullable_type = ibis_type.copy(nullable=True)
+    return value.cast(nullable_type).name(name)
+
+
+def bigframes_dtype_to_ibis_dtype(
+    bigframes_dtype: Union[
+        bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype, np.dtype[Any]
+    ]
+) -> ibis_dtypes.DataType:
+    """Converts a BigQuery DataFrames supported dtype to an Ibis dtype.
+
+    Args:
+        bigframes_dtype:
+            A dtype supported by BigQuery DataFrame
+
+    Returns:
+        IbisDtype: The corresponding Ibis type
+
+    Raises:
+        ValueError: If passed a dtype not supported by BigQuery DataFrames.
+    """
+    if isinstance(bigframes_dtype, pd.ArrowDtype):
+        return _arrow_dtype_to_ibis_dtype(bigframes_dtype.pyarrow_dtype)
+
+    type_string = str(bigframes_dtype)
+    if type_string in bigframes.dtypes.BIGFRAMES_STRING_TO_BIGFRAMES:
+        bigframes_dtype = bigframes.dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[
+            cast(bigframes.dtypes.DtypeString, type_string)
+        ]
+    else:
+        raise ValueError(
+            textwrap.dedent(
+                f"""
+                Unexpected data type {bigframes_dtype}. The following
+                        str dtypes are supppted: 'boolean','Float64','Int64',
+                        'int64[pyarrow]','string','string[pyarrow]',
+                        'timestamp[us, tz=UTC][pyarrow]','timestamp[us][pyarrow]',
+                        'date32[day][pyarrow]','time64[us][pyarrow]'.
+                        The following pandas.ExtensionDtype are supported:
+                        pandas.BooleanDtype(), pandas.Float64Dtype(),
+                        pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
+                        pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
+                        pd.ArrowDtype(pa.timestamp("us")),
+                        pd.ArrowDtype(pa.timestamp("us", tz="UTC")).
+                {constants.FEEDBACK_LINK}
+                """
+            )
+        )
+
+    return BIGFRAMES_TO_IBIS[bigframes_dtype]
+
+
+def ibis_dtype_to_bigframes_dtype(
+    ibis_dtype: ibis_dtypes.DataType,
+) -> bigframes.dtypes.Dtype:
+    """Converts an Ibis dtype to a BigQuery DataFrames dtype
+
+    Args:
+        ibis_dtype: The ibis dtype used to represent this type, which
+        should in turn correspond to an underlying BigQuery type
+
+    Returns:
+        The supported BigQuery DataFrames dtype, which may be provided by
+        pandas, numpy, or db_types
+
+    Raises:
+        ValueError: if passed an unexpected type
+    """
+    # Special cases: Ibis supports variations on these types, but currently
+    # our IO returns them as objects. Eventually, we should support them as
+    # ArrowDType (and update the IO accordingly)
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
+        return pd.ArrowDtype(_ibis_dtype_to_arrow_dtype(ibis_dtype))
+
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pd.ArrowDtype(_ibis_dtype_to_arrow_dtype(ibis_dtype))
+
+    # BigQuery only supports integers of size 64 bits.
+    if isinstance(ibis_dtype, ibis_dtypes.Integer):
+        return pd.Int64Dtype()
+
+    # Temporary: Will eventually support an explicit json type instead of casting to string.
+    if isinstance(ibis_dtype, ibis_dtypes.JSON):
+        warnings.warn(
+            "Interpreting JSON as string. This behavior may change in future versions.",
+            bigframes.exceptions.PreviewWarning,
+        )
+        return bigframes.dtypes.STRING_DTYPE
+
+    if ibis_dtype in IBIS_TO_BIGFRAMES:
+        return IBIS_TO_BIGFRAMES[ibis_dtype]
+    elif isinstance(ibis_dtype, ibis_dtypes.Decimal):
+        # Temporary workaround for ibis decimal issue (b/323387826)
+        if ibis_dtype.precision >= 76:
+            return pd.ArrowDtype(pa.decimal256(76, 38))
+        else:
+            return pd.ArrowDtype(pa.decimal128(38, 9))
+    elif isinstance(ibis_dtype, ibis_dtypes.Null):
+        # Fallback to STRING for NULL values for most flexibility in SQL.
+        return IBIS_TO_BIGFRAMES[ibis_dtypes.string]
+    else:
+        raise ValueError(
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
+def _ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
+    """Private utility to convert ibis dtype to equivalent arrow type."""
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
+        return pa.list_(
+            _ibis_dtype_to_arrow_dtype(ibis_dtype.value_type.copy(nullable=True))
+        )
+
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pa.struct(
+            [
+                (name, _ibis_dtype_to_arrow_dtype(dtype))
+                for name, dtype in ibis_dtype.fields.items()
+            ]
+        )
+
+    if ibis_dtype in IBIS_TO_ARROW:
+        return IBIS_TO_ARROW[ibis_dtype]
+    else:
+        raise ValueError(
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
+def _arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
+    if pa.types.is_struct(arrow_dtype):
+        struct_dtype = cast(pa.StructType, arrow_dtype)
+        return ibis_dtypes.Struct.from_tuples(
+            [
+                (field.name, _arrow_dtype_to_ibis_dtype(field.type))
+                for field in struct_dtype
+            ]
+        )
+
+    if arrow_dtype in ARROW_TO_IBIS:
+        return ARROW_TO_IBIS[arrow_dtype]
+    if arrow_dtype == pa.null():
+        # Used for empty local dataframes where pyarrow has null type
+        return ibis_dtypes.float64
+    else:
+        raise ValueError(
+            f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
+def literal_to_ibis_scalar(
+    literal, force_dtype: Optional[bigframes.dtypes.Dtype] = None, validate: bool = True
+):
+    """Accept any literal and, if possible, return an Ibis Scalar
+    expression with a BigQuery DataFrames compatible data type
+
+    Args:
+        literal:
+            any value accepted by Ibis
+        force_dtype:
+            force the value to a specific dtype
+        validate:
+            If true, will raise ValueError if type cannot be stored in a
+            BigQuery DataFrames object. If used as a subexpression, this should
+            be disabled.
+
+    Returns:
+        An ibis Scalar supported by BigQuery DataFrame
+
+    Raises:
+        ValueError: if passed literal cannot be coerced to a
+        BigQuery DataFrames compatible scalar
+    """
+    # Special case: Can create nulls for non-bidirectional types
+    if (force_dtype == gpd.array.GeometryDtype()) and pd.isna(literal):
+        # Ibis has bug for casting nulltype to geospatial, so we perform intermediate cast first
+        geotype = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
+        return ibis.literal(None, geotype)
+    ibis_dtype = BIGFRAMES_TO_IBIS[force_dtype] if force_dtype else None
+
+    if pd.api.types.is_list_like(literal):
+        if validate:
+            raise ValueError(
+                f"List types can't be stored in BigQuery DataFrames. {constants.FEEDBACK_LINK}"
+            )
+        # "correct" way would be to use ibis.array, but this produces invalid BQ SQL syntax
+        return tuple(literal)
+    if not pd.api.types.is_list_like(literal) and pd.isna(literal):
+        if ibis_dtype:
+            return ibis.null().cast(ibis_dtype)
+        else:
+            return ibis.null()
+
+    scalar_expr = ibis.literal(literal)
+    if ibis_dtype:
+        scalar_expr = ibis.literal(literal, ibis_dtype)
+    elif scalar_expr.type().is_floating():
+        scalar_expr = ibis.literal(literal, ibis_dtypes.float64)
+    elif scalar_expr.type().is_integer():
+        scalar_expr = ibis.literal(literal, ibis_dtypes.int64)
+    elif scalar_expr.type().is_decimal():
+        precision = scalar_expr.type().precision
+        scale = scalar_expr.type().scale
+        if (not precision and not scale) or (
+            precision and scale and scale <= 9 and precision + (9 - scale) <= 38
+        ):
+            scalar_expr = ibis.literal(
+                literal, ibis_dtypes.decimal(precision=38, scale=9)
+            )
+        elif precision and scale and scale <= 38 and precision + (38 - scale) <= 76:
+            scalar_expr = ibis.literal(
+                literal, ibis_dtypes.decimal(precision=76, scale=38)
+            )
+        else:
+            raise TypeError(
+                "BigQuery's decimal data type supports a maximum precision of 76 and a maximum scale of 38."
+                f"Current precision: {precision}. Current scale: {scale}"
+            )
+
+    # TODO(bmil): support other literals that can be coerced to compatible types
+    if validate and (scalar_expr.type() not in BIGFRAMES_TO_IBIS.values()):
+        raise ValueError(
+            f"Literal did not coerce to a supported data type: {scalar_expr.type()}. {constants.FEEDBACK_LINK}"
+        )
+
+    return scalar_expr
+
+
+class UnsupportedTypeError(ValueError):
+    def __init__(self, type_, supported_types):
+        self.type = type_
+        self.supported_types = supported_types
+
+
+def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType:
+    if t not in bigframes.dtypes.RF_SUPPORTED_IO_PYTHON_TYPES:
+        raise UnsupportedTypeError(t, bigframes.dtypes.RF_SUPPORTED_IO_PYTHON_TYPES)
+    return python_type_to_bigquery_type(t)
+
+
+def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
+    """Convert bq type to ibis. Only to be used for remote functions, does not handle all types."""
+    if tk not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS:
+        raise UnsupportedTypeError(
+            tk, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
+        )
+    return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 000c4a4c09..6b8e60434e 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -27,6 +27,7 @@
 import pandas as pd
 
 import bigframes.constants as constants
+import bigframes.core.compile.ibis_types
 import bigframes.core.expression as ex
 import bigframes.dtypes
 import bigframes.operations as ops
@@ -78,7 +79,7 @@ def _(
         expression: ex.ScalarConstantExpression,
         bindings: typing.Dict[str, ibis_types.Value],
     ) -> ibis_types.Value:
-        return bigframes.dtypes.literal_to_ibis_scalar(
+        return bigframes.core.compile.ibis_types.literal_to_ibis_scalar(
             expression.value, expression.dtype
         )
 
@@ -771,14 +772,16 @@ def numeric_to_datetime(x: ibis_types.Value, unit: str) -> ibis_types.TimestampV
 
 @scalar_op_compiler.register_unary_op(ops.AsTypeOp, pass_op=True)
 def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
-    to_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(op.to_type)
+    to_type = bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(
+        op.to_type
+    )
     if isinstance(x, ibis_types.NullScalar):
         return ibis_types.null().cast(to_type)
 
     # When casting DATETIME column into INT column, we need to convert the column into TIMESTAMP first.
     if to_type == ibis_dtypes.int64 and x.type() == ibis_dtypes.timestamp:
         x_converted = x.cast(ibis_dtypes.Timestamp(timezone="UTC"))
-        return bigframes.dtypes.cast_ibis_value(x_converted, to_type)
+        return bigframes.core.compile.ibis_types.cast_ibis_value(x_converted, to_type)
 
     if to_type == ibis_dtypes.int64 and x.type() == ibis_dtypes.time:
         # The conversion unit is set to "us" (microseconds) for consistency
@@ -798,7 +801,7 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
         elif to_type == ibis_dtypes.time:
             return x_converted.time()
 
-    return bigframes.dtypes.cast_ibis_value(x, to_type)
+    return bigframes.core.compile.ibis_types.cast_ibis_value(x, to_type)
 
 
 @scalar_op_compiler.register_unary_op(ops.IsInOp, pass_op=True)
@@ -1013,15 +1016,7 @@ def add_op(
 ):
     if isinstance(x, ibis_types.NullScalar) or isinstance(x, ibis_types.NullScalar):
         return ibis.null()
-    try:
-        # Could be string concatenation or numeric addition.
-        return x + y  # type: ignore
-    except ibis.common.annotations.SignatureValidationError as exc:
-        left_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(x.type())
-        right_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(y.type())
-        raise TypeError(
-            f"Cannot add {repr(left_type)} and {repr(right_type)}. {constants.FEEDBACK_LINK}"
-        ) from exc
+    return x + y  # type: ignore
 
 
 @scalar_op_compiler.register_binary_op(ops.sub_op)
diff --git a/bigframes/core/compile/schema_translator.py b/bigframes/core/compile/schema_translator.py
index 03e9691af6..d19c1bfb86 100644
--- a/bigframes/core/compile/schema_translator.py
+++ b/bigframes/core/compile/schema_translator.py
@@ -18,6 +18,7 @@
 import ibis
 import ibis.expr.schema
 
+import bigframes.core.compile.ibis_types
 import bigframes.core.schema as bf_schema
 import bigframes.dtypes
 
@@ -28,7 +29,7 @@ def convert_bf_schema(schema: bf_schema.ArraySchema) -> ibis.expr.schema.Schema:
     """
     names = schema.names
     types = [
-        bigframes.dtypes.bigframes_dtype_to_ibis_dtype(bf_type)
+        bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(bf_type)
         for bf_type in schema.dtypes
     ]
     return ibis.schema(names=names, types=types)
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
index e40e20b0cb..0e5082447a 100644
--- a/bigframes/core/indexes/base.py
+++ b/bigframes/core/indexes/base.py
@@ -90,9 +90,12 @@ def __new__(
         # TODO: Support more index subtypes
         from bigframes.core.indexes.multi import MultiIndex
 
-        klass = MultiIndex if len(block._index_columns) > 1 else cls
-        # TODO(b/340893286): fix type error
-        result = typing.cast(Index, object.__new__(klass))  # type: ignore
+        if len(block._index_columns) <= 1:
+            klass = cls
+        else:
+            klass = MultiIndex
+
+        result = typing.cast(Index, object.__new__(klass))
         result._query_job = None
         result._block = block
         block.session._register_object(result)
@@ -161,7 +164,8 @@ def dtype(self):
     @property
     def dtypes(self) -> pandas.Series:
         return pandas.Series(
-            data=self._block.index.dtypes, index=self._block.index.names  # type:ignore
+            data=self._block.index.dtypes,
+            index=typing.cast(typing.Tuple, self._block.index.names),
         )
 
     @property
@@ -408,10 +412,10 @@ def drop(
         block = block.drop_columns([condition_id])
         return Index(block)
 
-    def dropna(self, how: str = "any") -> Index:
+    def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
         if how not in ("any", "all"):
             raise ValueError("'how' must be one of 'any', 'all'")
-        result = block_ops.dropna(self._block, self._block.index_columns, how=how)  # type: ignore
+        result = block_ops.dropna(self._block, self._block.index_columns, how=how)
         return Index(result)
 
     def drop_duplicates(self, *, keep: str = "first") -> Index:
diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py
index 8b256be6d2..ac658d1bb8 100644
--- a/bigframes/core/local_data.py
+++ b/bigframes/core/local_data.py
@@ -46,8 +46,8 @@ def adapt_pa_table(arrow_table: pa.Table) -> pa.Table:
 
 
 def bigframes_type_for_arrow_type(pa_type: pa.DataType) -> bigframes.dtypes.Dtype:
-    return bigframes.dtypes.ibis_dtype_to_bigframes_dtype(
-        bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_type_replacements(pa_type))
+    return bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
+        arrow_type_replacements(pa_type)
     )
 
 
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index 1af7c5bd17..077a362ba0 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -324,10 +324,12 @@ def roots(self) -> typing.Set[BigFrameNode]:
 
     @functools.cached_property
     def schema(self) -> schemata.ArraySchema:
+        from bigframes.core.compile.ibis_types import ibis_dtype_to_bigframes_dtype
+
         items = tuple(
             schemata.SchemaItem(
                 value.get_name(),
-                bigframes.dtypes.ibis_dtype_to_bigframes_dtype(value.type()),
+                ibis_dtype_to_bigframes_dtype(value.type()),
             )
             for value in self.columns
         )
@@ -376,7 +378,7 @@ def __post_init__(self):
             raise ValueError(
                 f"Requested schema {self.columns} cannot be derived from table schemal {self.physical_schema}"
             )
-        if self.order_col_is_sequential and len(self.total_order_cols) == 1:
+        if self.order_col_is_sequential and len(self.total_order_cols) != 1:
             raise ValueError("Sequential primary key must have only one component")
 
     @property
@@ -409,6 +411,56 @@ def transform_children(
         return self
 
 
+# This node shouldn't be used in the "original" expression tree, only used as replacement for original during planning
+@dataclass(frozen=True)
+class CachedTableNode(BigFrameNode):
+    # The original BFET subtree that was cached
+    # note: this isn't a "child" node.
+    original_node: BigFrameNode = field()
+    # reference to cached materialization of original_node
+    project_id: str = field()
+    dataset_id: str = field()
+    table_id: str = field()
+    physical_schema: Tuple[bq.SchemaField, ...] = field()
+
+    ordering: typing.Optional[orderings.ExpressionOrdering] = field()
+
+    @property
+    def session(self):
+        return self.original_node.session
+
+    def __hash__(self):
+        return self._node_hash
+
+    @property
+    def roots(self) -> typing.Set[BigFrameNode]:
+        return {self}
+
+    @property
+    def schema(self) -> schemata.ArraySchema:
+        return self.original_node.schema
+
+    @functools.cached_property
+    def variables_introduced(self) -> int:
+        return len(self.schema.items) + OVERHEAD_VARIABLES
+
+    @property
+    def hidden_columns(self) -> typing.Tuple[str, ...]:
+        """Physical columns used to define ordering but not directly exposed as value columns."""
+        if self.ordering is None:
+            return ()
+        return tuple(
+            col
+            for col in sorted(self.ordering.referenced_columns)
+            if col not in self.schema.names
+        )
+
+    def transform_children(
+        self, t: Callable[[BigFrameNode], BigFrameNode]
+    ) -> BigFrameNode:
+        return self
+
+
 # Unary nodes
 @dataclass(frozen=True)
 class PromoteOffsetsNode(UnaryNode):
diff --git a/bigframes/core/ordering.py b/bigframes/core/ordering.py
index 9009e31be3..1562592720 100644
--- a/bigframes/core/ordering.py
+++ b/bigframes/core/ordering.py
@@ -215,6 +215,14 @@ def is_sequential(self) -> bool:
     def all_ordering_columns(self) -> Sequence[OrderingExpression]:
         return list(self.ordering_value_columns)
 
+    @property
+    def referenced_columns(self) -> Set[str]:
+        return set(
+            col
+            for part in self.ordering_value_columns
+            for col in part.scalar_expression.unbound_variables
+        )
+
 
 def encode_order_string(
     order_id: ibis_types.IntegerColumn, length: int = DEFAULT_ORDERING_ID_LENGTH
diff --git a/bigframes/core/reshape/__init__.py b/bigframes/core/reshape/__init__.py
index 05cb5c7e94..a23461bdb9 100644
--- a/bigframes/core/reshape/__init__.py
+++ b/bigframes/core/reshape/__init__.py
@@ -116,7 +116,7 @@ def cut(
         Iterable,
     ],
     *,
-    labels: Optional[bool] = None,
+    labels: Union[Iterable[str], bool, None] = None,
 ) -> bigframes.series.Series:
     if isinstance(bins, int) and bins <= 0:
         raise ValueError("`bins` should be a positive integer.")
diff --git a/bigframes/core/sql.py b/bigframes/core/sql.py
index c1e319b860..01dcebad6e 100644
--- a/bigframes/core/sql.py
+++ b/bigframes/core/sql.py
@@ -20,7 +20,7 @@
 import datetime
 import math
 import textwrap
-from typing import Iterable, TYPE_CHECKING
+from typing import Iterable, Mapping, TYPE_CHECKING, Union
 
 # Literals and identifiers matching this pattern can be unquoted
 unquoted = r"^[A-Za-z_][A-Za-z_0-9]*$"
@@ -96,6 +96,12 @@ def cast_as_string(column_name: str) -> str:
     return f"CAST({identifier(column_name)} AS STRING)"
 
 
+def to_json_string(column_name: str) -> str:
+    """Return a string representing JSON version of a column."""
+
+    return f"TO_JSON_STRING({identifier(column_name)})"
+
+
 def csv(values: Iterable[str]) -> str:
     """Return a string of comma separated values."""
     return ", ".join(values)
@@ -169,3 +175,47 @@ def ordering_clause(
         part = f"`{ordering_expr.id}` {asc_desc} {null_clause}"
         parts.append(part)
     return f"ORDER BY {' ,'.join(parts)}"
+
+
+def create_vector_search_sql(
+    sql_string: str,
+    options: Mapping[str, Union[str | int | bool | float]] = {},
+) -> str:
+    """Encode the VECTOR SEARCH statement for BigQuery Vector Search."""
+
+    base_table = options["base_table"]
+    column_to_search = options["column_to_search"]
+    distance_type = options["distance_type"]
+    top_k = options["top_k"]
+    query_column_to_search = options.get("query_column_to_search", None)
+
+    if query_column_to_search is not None:
+        query_str = f"""
+    SELECT
+        query.*,
+        base.*,
+        distance,
+    FROM VECTOR_SEARCH(
+        TABLE `{base_table}`,
+        {simple_literal(column_to_search)},
+        ({sql_string}),
+        {simple_literal(query_column_to_search)},
+        distance_type => {simple_literal(distance_type)},
+        top_k => {simple_literal(top_k)}
+    )
+    """
+    else:
+        query_str = f"""
+    SELECT
+        query.*,
+        base.*,
+        distance,
+    FROM VECTOR_SEARCH(
+        TABLE `{base_table}`,
+        {simple_literal(column_to_search)},
+        ({sql_string}),
+        distance_type => {simple_literal(distance_type)},
+        top_k => {simple_literal(top_k)}
+    )
+    """
+    return query_str
diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py
index 5eac4cceb9..5d8d8c9685 100644
--- a/bigframes/core/tools/datetimes.py
+++ b/bigframes/core/tools/datetimes.py
@@ -28,7 +28,7 @@
 
 def to_datetime(
     arg: Union[
-        vendored_pandas_datetimes.local_scalars,
+        Union[int, float, str, datetime],
         vendored_pandas_datetimes.local_iterables,
         bigframes.series.Series,
         bigframes.dataframe.DataFrame,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index e404e439ab..f12c346776 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1215,10 +1215,30 @@ def to_pandas(
         self._set_internal_query_job(query_job)
         return df.set_axis(self._block.column_labels, axis=1, copy=False)
 
-    def to_pandas_batches(self) -> Iterable[pandas.DataFrame]:
-        """Stream DataFrame results to an iterable of pandas DataFrame"""
+    def to_pandas_batches(
+        self, page_size: Optional[int] = None, max_results: Optional[int] = None
+    ) -> Iterable[pandas.DataFrame]:
+        """Stream DataFrame results to an iterable of pandas DataFrame.
+
+        page_size and max_results determine the size and number of batches,
+        see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
+
+        Args:
+            page_size (int, default None):
+                The size of each batch.
+            max_results (int, default None):
+                If given, only download this many rows at maximum.
+
+        Returns:
+            Iterable[pandas.DataFrame]:
+                An iterable of smaller dataframes which combine to
+                form the original dataframe. Results stream from bigquery,
+                see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable
+        """
         self._optimize_query_complexity()
-        return self._block.to_pandas_batches()
+        return self._block.to_pandas_batches(
+            page_size=page_size, max_results=max_results
+        )
 
     def _compute_dry_run(self) -> bigquery.QueryJob:
         return self._block._compute_dry_run()
@@ -3313,22 +3333,43 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
             # Early check whether the dataframe dtypes are currently supported
             # in the remote function
             # NOTE: Keep in sync with the value converters used in the gcf code
-            # generated in generate_cloud_function_main_code in remote_function.py
+            # generated in remote_function_template.py
             remote_function_supported_dtypes = (
                 bigframes.dtypes.INT_DTYPE,
                 bigframes.dtypes.FLOAT_DTYPE,
                 bigframes.dtypes.BOOL_DTYPE,
+                bigframes.dtypes.BYTES_DTYPE,
                 bigframes.dtypes.STRING_DTYPE,
             )
             supported_dtypes_types = tuple(
-                type(dtype) for dtype in remote_function_supported_dtypes
+                type(dtype)
+                for dtype in remote_function_supported_dtypes
+                if not isinstance(dtype, pandas.ArrowDtype)
+            )
+            # Check ArrowDtype separately since multiple BigQuery types map to
+            # ArrowDtype, including BYTES and TIMESTAMP.
+            supported_arrow_types = tuple(
+                dtype.pyarrow_dtype
+                for dtype in remote_function_supported_dtypes
+                if isinstance(dtype, pandas.ArrowDtype)
             )
             supported_dtypes_hints = tuple(
                 str(dtype) for dtype in remote_function_supported_dtypes
             )
 
             for dtype in self.dtypes:
-                if not isinstance(dtype, supported_dtypes_types):
+                if (
+                    # Not one of the pandas/numpy types.
+                    not isinstance(dtype, supported_dtypes_types)
+                    # And not one of the arrow types.
+                    and not (
+                        isinstance(dtype, pandas.ArrowDtype)
+                        and any(
+                            dtype.pyarrow_dtype.equals(arrow_type)
+                            for arrow_type in supported_arrow_types
+                        )
+                    )
+                ):
                     raise NotImplementedError(
                         f"DataFrame has a column of dtype '{dtype}' which is not supported with axis=1."
                         f" Supported dtypes are {supported_dtypes_hints}."
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 3df67ed9e4..3b9d5bf141 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -16,25 +16,17 @@
 
 import datetime
 import decimal
-import textwrap
 import typing
-from typing import Any, Dict, Iterable, Literal, Tuple, Union
-import warnings
+from typing import Any, Dict, Literal, Union
 
 import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
-import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import geopandas as gpd  # type: ignore
-import google.cloud.bigquery as bigquery
 import ibis
-import ibis.expr.datatypes as ibis_dtypes
-from ibis.expr.datatypes.core import dtype as python_type_to_bigquery_type
-import ibis.expr.types as ibis_types
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 
 import bigframes.constants as constants
-import bigframes.exceptions
 
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
@@ -81,17 +73,6 @@
     "binary[pyarrow]",
 ]
 
-# Type hints for Ibis data types supported by BigQuery DataFrame
-IbisDtype = Union[
-    ibis_dtypes.Boolean,
-    ibis_dtypes.Float64,
-    ibis_dtypes.Int64,
-    ibis_dtypes.String,
-    ibis_dtypes.Date,
-    ibis_dtypes.Time,
-    ibis_dtypes.Timestamp,
-]
-
 BOOL_BIGFRAMES_TYPES = [pd.BooleanDtype()]
 
 # Corresponds to the pandas concept of numeric type (such as when 'numeric_only' is specified in an operation)
@@ -170,68 +151,23 @@ def is_bool_coercable(type: ExpressionType) -> bool:
     return (type is None) or is_numeric(type) or is_string_like(type)
 
 
-BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, Dtype]] = (
-    (ibis_dtypes.boolean, pd.BooleanDtype()),
-    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
-    (ibis_dtypes.float64, pd.Float64Dtype()),
-    (ibis_dtypes.int64, pd.Int64Dtype()),
-    (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
-    (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
-    (ibis_dtypes.Timestamp(timezone=None), pd.ArrowDtype(pa.timestamp("us"))),
-    (
-        ibis_dtypes.Timestamp(timezone="UTC"),
-        pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
-    ),
-    (ibis_dtypes.binary, pd.ArrowDtype(pa.binary())),
-    (
-        ibis_dtypes.Decimal(precision=38, scale=9, nullable=True),
-        pd.ArrowDtype(pa.decimal128(38, 9)),
-    ),
-    (
-        ibis_dtypes.Decimal(precision=76, scale=38, nullable=True),
-        pd.ArrowDtype(pa.decimal256(76, 38)),
-    ),
-    (
-        ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True),
-        gpd.array.GeometryDtype(),
-    ),
-)
-
-BIGFRAMES_TO_IBIS: Dict[Dtype, ibis_dtypes.DataType] = {
-    pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
-}
-
-IBIS_TO_ARROW: Dict[ibis_dtypes.DataType, pa.DataType] = {
-    ibis_dtypes.boolean: pa.bool_(),
-    ibis_dtypes.date: pa.date32(),
-    ibis_dtypes.float64: pa.float64(),
-    ibis_dtypes.int64: pa.int64(),
-    ibis_dtypes.string: pa.string(),
-    ibis_dtypes.time: pa.time64("us"),
-    ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
-    ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
-    ibis_dtypes.binary: pa.binary(),
-    ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): pa.decimal128(38, 9),
-    ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): pa.decimal256(76, 38),
-}
-
-ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
-
-IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, Dtype] = {
-    ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
-}
-# Allow REQUIRED fields to map correctly.
-IBIS_TO_BIGFRAMES.update(
-    {ibis.copy(nullable=False): pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS}
-)
-IBIS_TO_BIGFRAMES.update(
-    {
-        # TODO: Interval
-    }
+_ALL_DTYPES = (
+    pd.BooleanDtype(),
+    pd.ArrowDtype(pa.date32()),
+    pd.Float64Dtype(),
+    pd.Int64Dtype(),
+    pd.StringDtype(storage="pyarrow"),
+    pd.ArrowDtype(pa.time64("us")),
+    pd.ArrowDtype(pa.timestamp("us")),
+    pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+    pd.ArrowDtype(pa.binary()),
+    pd.ArrowDtype(pa.decimal128(38, 9)),
+    pd.ArrowDtype(pa.decimal256(76, 38)),
+    gpd.array.GeometryDtype(),
 )
 
 BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {
-    typing.cast(DtypeString, dtype.name): dtype for dtype in BIGFRAMES_TO_IBIS.keys()
+    typing.cast(DtypeString, dtype.name): dtype for dtype in _ALL_DTYPES
 }
 
 # special case - string[pyarrow] doesn't include the storage in its name, and both
@@ -262,362 +198,27 @@ def dtype_for_etype(etype: ExpressionType) -> Dtype:
         return etype
 
 
-def ibis_dtype_to_bigframes_dtype(
-    ibis_dtype: ibis_dtypes.DataType,
-) -> Dtype:
-    """Converts an Ibis dtype to a BigQuery DataFrames dtype
-
-    Args:
-        ibis_dtype: The ibis dtype used to represent this type, which
-        should in turn correspond to an underlying BigQuery type
-
-    Returns:
-        The supported BigQuery DataFrames dtype, which may be provided by
-        pandas, numpy, or db_types
-
-    Raises:
-        ValueError: if passed an unexpected type
-    """
-    # Special cases: Ibis supports variations on these types, but currently
-    # our IO returns them as objects. Eventually, we should support them as
-    # ArrowDType (and update the IO accordingly)
-    if isinstance(ibis_dtype, ibis_dtypes.Array):
-        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
-
-    if isinstance(ibis_dtype, ibis_dtypes.Struct):
-        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
-
-    # BigQuery only supports integers of size 64 bits.
-    if isinstance(ibis_dtype, ibis_dtypes.Integer):
-        return pd.Int64Dtype()
-
-    # Temporary: Will eventually support an explicit json type instead of casting to string.
-    if isinstance(ibis_dtype, ibis_dtypes.JSON):
-        warnings.warn(
-            "Interpreting JSON as string. This behavior may change in future versions.",
-            bigframes.exceptions.PreviewWarning,
-        )
-        return STRING_DTYPE
-
-    if ibis_dtype in IBIS_TO_BIGFRAMES:
-        return IBIS_TO_BIGFRAMES[ibis_dtype]
-    elif isinstance(ibis_dtype, ibis_dtypes.Decimal):
-        # Temporary workaround for ibis decimal issue (b/323387826)
-        if ibis_dtype.precision >= 76:
-            return pd.ArrowDtype(pa.decimal256(76, 38))
-        else:
-            return pd.ArrowDtype(pa.decimal128(38, 9))
-    elif isinstance(ibis_dtype, ibis_dtypes.Null):
-        # Fallback to STRING for NULL values for most flexibility in SQL.
-        return IBIS_TO_BIGFRAMES[ibis_dtypes.string]
-    else:
-        raise ValueError(
-            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
-        )
-
-
-def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
-    if isinstance(ibis_dtype, ibis_dtypes.Array):
-        return pa.list_(
-            ibis_dtype_to_arrow_dtype(ibis_dtype.value_type.copy(nullable=True))
-        )
-
-    if isinstance(ibis_dtype, ibis_dtypes.Struct):
-        return pa.struct(
-            [
-                (name, ibis_dtype_to_arrow_dtype(dtype))
-                for name, dtype in ibis_dtype.fields.items()
-            ]
-        )
-
-    if ibis_dtype in IBIS_TO_ARROW:
-        return IBIS_TO_ARROW[ibis_dtype]
-    else:
-        raise ValueError(
-            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
-        )
-
-
-def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
-    """Converts an Ibis expression to canonical type.
-
-    This is useful in cases where multiple types correspond to the same BigFrames dtype.
-    """
-    ibis_type = value.type()
-    name = value.get_name()
-    if ibis_type.is_json():
-        value = vendored_ibis_ops.ToJsonString(value).to_expr()
-        return value.name(name)
-    # Allow REQUIRED fields to be joined with NULLABLE fields.
-    nullable_type = ibis_type.copy(nullable=True)
-    return value.cast(nullable_type).name(name)
-
-
-def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
-    if pa.types.is_struct(arrow_dtype):
-        struct_dtype = typing.cast(pa.StructType, arrow_dtype)
-        return ibis_dtypes.Struct.from_tuples(
-            [
-                (field.name, arrow_dtype_to_ibis_dtype(field.type))
-                for field in struct_dtype
-            ]
-        )
-
-    if arrow_dtype in ARROW_TO_IBIS:
-        return ARROW_TO_IBIS[arrow_dtype]
-    if arrow_dtype == pa.null():
-        # Used for empty local dataframes where pyarrow has null type
-        return ibis_dtypes.float64
-    else:
-        raise ValueError(
-            f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
-        )
-
-
 def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype:
-    return ibis_dtype_to_bigframes_dtype(arrow_dtype_to_ibis_dtype(arrow_dtype))
-
-
-def bigframes_dtype_to_ibis_dtype(
-    bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]]
-) -> ibis_dtypes.DataType:
-    """Converts a BigQuery DataFrames supported dtype to an Ibis dtype.
-
-    Args:
-        bigframes_dtype:
-            A dtype supported by BigQuery DataFrame
-
-    Returns:
-        IbisDtype: The corresponding Ibis type
-
-    Raises:
-        ValueError: If passed a dtype not supported by BigQuery DataFrames.
-    """
-    if isinstance(bigframes_dtype, pd.ArrowDtype):
-        return arrow_dtype_to_ibis_dtype(bigframes_dtype.pyarrow_dtype)
-
-    type_string = str(bigframes_dtype)
-    if type_string in BIGFRAMES_STRING_TO_BIGFRAMES:
-        bigframes_dtype = BIGFRAMES_STRING_TO_BIGFRAMES[
-            typing.cast(DtypeString, type_string)
-        ]
-    else:
-        raise ValueError(
-            textwrap.dedent(
-                f"""
-                Unexpected data type {bigframes_dtype}. The following
-                        str dtypes are supppted: 'boolean','Float64','Int64',
-                        'int64[pyarrow]','string','string[pyarrow]',
-                        'timestamp[us, tz=UTC][pyarrow]','timestamp[us][pyarrow]',
-                        'date32[day][pyarrow]','time64[us][pyarrow]'.
-                        The following pandas.ExtensionDtype are supported:
-                        pandas.BooleanDtype(), pandas.Float64Dtype(),
-                        pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
-                        pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
-                        pd.ArrowDtype(pa.timestamp("us")),
-                        pd.ArrowDtype(pa.timestamp("us", tz="UTC")).
-                {constants.FEEDBACK_LINK}
-                """
-            )
-        )
+    # TODO: Directly convert instead of using ibis dtype as intermediate step
+    from bigframes.core.compile.ibis_types import (
+        _arrow_dtype_to_ibis_dtype,
+        ibis_dtype_to_bigframes_dtype,
+    )
 
-    return BIGFRAMES_TO_IBIS[bigframes_dtype]
+    return ibis_dtype_to_bigframes_dtype(_arrow_dtype_to_ibis_dtype(arrow_dtype))
 
 
 def bigframes_dtype_to_arrow_dtype(
     bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]]
 ) -> pa.DataType:
-    return ibis_dtype_to_arrow_dtype(bigframes_dtype_to_ibis_dtype(bigframes_dtype))
-
-
-def literal_to_ibis_scalar(
-    literal, force_dtype: typing.Optional[Dtype] = None, validate: bool = True
-):
-    """Accept any literal and, if possible, return an Ibis Scalar
-    expression with a BigQuery DataFrames compatible data type
-
-    Args:
-        literal:
-            any value accepted by Ibis
-        force_dtype:
-            force the value to a specific dtype
-        validate:
-            If true, will raise ValueError if type cannot be stored in a
-            BigQuery DataFrames object. If used as a subexpression, this should
-            be disabled.
-
-    Returns:
-        An ibis Scalar supported by BigQuery DataFrame
-
-    Raises:
-        ValueError: if passed literal cannot be coerced to a
-        BigQuery DataFrames compatible scalar
-    """
-    # Special case: Can create nulls for non-bidirectional types
-    if (force_dtype == gpd.array.GeometryDtype()) and pd.isna(literal):
-        # Ibis has bug for casting nulltype to geospatial, so we perform intermediate cast first
-        geotype = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
-        return ibis.literal(None, geotype)
-    ibis_dtype = BIGFRAMES_TO_IBIS[force_dtype] if force_dtype else None
-
-    if pd.api.types.is_list_like(literal):
-        if validate:
-            raise ValueError(
-                f"List types can't be stored in BigQuery DataFrames. {constants.FEEDBACK_LINK}"
-            )
-        # "correct" way would be to use ibis.array, but this produces invalid BQ SQL syntax
-        return tuple(literal)
-    if not pd.api.types.is_list_like(literal) and pd.isna(literal):
-        if ibis_dtype:
-            return ibis.null().cast(ibis_dtype)
-        else:
-            return ibis.null()
-
-    scalar_expr = ibis.literal(literal)
-    if ibis_dtype:
-        scalar_expr = ibis.literal(literal, ibis_dtype)
-    elif scalar_expr.type().is_floating():
-        scalar_expr = ibis.literal(literal, ibis_dtypes.float64)
-    elif scalar_expr.type().is_integer():
-        scalar_expr = ibis.literal(literal, ibis_dtypes.int64)
-    elif scalar_expr.type().is_decimal():
-        precision = scalar_expr.type().precision
-        scale = scalar_expr.type().scale
-        if (not precision and not scale) or (
-            precision and scale and scale <= 9 and precision + (9 - scale) <= 38
-        ):
-            scalar_expr = ibis.literal(
-                literal, ibis_dtypes.decimal(precision=38, scale=9)
-            )
-        elif precision and scale and scale <= 38 and precision + (38 - scale) <= 76:
-            scalar_expr = ibis.literal(
-                literal, ibis_dtypes.decimal(precision=76, scale=38)
-            )
-        else:
-            raise TypeError(
-                "BigQuery's decimal data type supports a maximum precision of 76 and a maximum scale of 38."
-                f"Current precision: {precision}. Current scale: {scale}"
-            )
-
-    # TODO(bmil): support other literals that can be coerced to compatible types
-    if validate and (scalar_expr.type() not in BIGFRAMES_TO_IBIS.values()):
-        raise ValueError(
-            f"Literal did not coerce to a supported data type: {scalar_expr.type()}. {constants.FEEDBACK_LINK}"
-        )
-
-    return scalar_expr
-
-
-def cast_ibis_value(
-    value: ibis_types.Value, to_type: ibis_dtypes.DataType
-) -> ibis_types.Value:
-    """Perform compatible type casts of ibis values
-
-    Args:
-        value:
-            Ibis value, which could be a literal, scalar, or column
-
-        to_type:
-            The Ibis type to cast to
-
-    Returns:
-        A new Ibis value of type to_type
-
-    Raises:
-        TypeError: if the type cast cannot be executed"""
-    if value.type() == to_type:
-        return value
-    # casts that just work
-    # TODO(bmil): add to this as more casts are verified
-    good_casts = {
-        ibis_dtypes.bool: (ibis_dtypes.int64,),
-        ibis_dtypes.int64: (
-            ibis_dtypes.bool,
-            ibis_dtypes.float64,
-            ibis_dtypes.string,
-            ibis_dtypes.Decimal(precision=38, scale=9),
-            ibis_dtypes.Decimal(precision=76, scale=38),
-            ibis_dtypes.time,
-            ibis_dtypes.timestamp,
-            ibis_dtypes.Timestamp(timezone="UTC"),
-        ),
-        ibis_dtypes.float64: (
-            ibis_dtypes.string,
-            ibis_dtypes.int64,
-            ibis_dtypes.Decimal(precision=38, scale=9),
-            ibis_dtypes.Decimal(precision=76, scale=38),
-        ),
-        ibis_dtypes.string: (
-            ibis_dtypes.int64,
-            ibis_dtypes.float64,
-            ibis_dtypes.Decimal(precision=38, scale=9),
-            ibis_dtypes.Decimal(precision=76, scale=38),
-            ibis_dtypes.binary,
-            ibis_dtypes.date,
-            ibis_dtypes.timestamp,
-            ibis_dtypes.Timestamp(timezone="UTC"),
-        ),
-        ibis_dtypes.date: (
-            ibis_dtypes.string,
-            ibis_dtypes.timestamp,
-            ibis_dtypes.Timestamp(timezone="UTC"),
-        ),
-        ibis_dtypes.Decimal(precision=38, scale=9): (
-            ibis_dtypes.float64,
-            ibis_dtypes.Decimal(precision=76, scale=38),
-        ),
-        ibis_dtypes.Decimal(precision=76, scale=38): (
-            ibis_dtypes.float64,
-            ibis_dtypes.Decimal(precision=38, scale=9),
-        ),
-        ibis_dtypes.time: (
-            ibis_dtypes.int64,
-            ibis_dtypes.string,
-        ),
-        ibis_dtypes.timestamp: (
-            ibis_dtypes.date,
-            ibis_dtypes.int64,
-            ibis_dtypes.string,
-            ibis_dtypes.time,
-            ibis_dtypes.Timestamp(timezone="UTC"),
-        ),
-        ibis_dtypes.Timestamp(timezone="UTC"): (
-            ibis_dtypes.date,
-            ibis_dtypes.int64,
-            ibis_dtypes.string,
-            ibis_dtypes.time,
-            ibis_dtypes.timestamp,
-        ),
-        ibis_dtypes.binary: (ibis_dtypes.string,),
-    }
-
-    value = ibis_value_to_canonical_type(value)
-    if value.type() in good_casts:
-        if to_type in good_casts[value.type()]:
-            return value.cast(to_type)
-    else:
-        # this should never happen
-        raise TypeError(
-            f"Unexpected value type {value.type()}. {constants.FEEDBACK_LINK}"
-        )
-
-    # casts that need some encouragement
-
-    # BigQuery casts bools to lower case strings. Capitalize the result to match Pandas
-    # TODO(bmil): remove this workaround after fixing Ibis
-    if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.string:
-        return typing.cast(ibis_types.StringValue, value.cast(to_type)).capitalize()
-
-    if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
-        return value.cast(ibis_dtypes.int64).cast(ibis_dtypes.float64)
-
-    if value.type() == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
-        return value != ibis_types.literal(0)
-
-    raise TypeError(
-        f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
+    # TODO: Directly convert instead of using ibis dtype as intermediate step
+    from bigframes.core.compile.ibis_types import (
+        _ibis_dtype_to_arrow_dtype,
+        bigframes_dtype_to_ibis_dtype,
     )
 
+    return _ibis_dtype_to_arrow_dtype(bigframes_dtype_to_ibis_dtype(bigframes_dtype))
+
 
 def is_dtype(scalar: typing.Any, dtype: Dtype) -> bool:
     """Captures whether a scalar can be losslessly represented by a dtype."""
@@ -740,6 +341,11 @@ def infer_literal_type(literal) -> typing.Optional[Dtype]:
     if pd.isna(literal):
         return None  # Null value without a definite type
     # Temporary logic, use ibis inferred type
+    from bigframes.core.compile.ibis_types import (
+        ibis_dtype_to_bigframes_dtype,
+        literal_to_ibis_scalar,
+    )
+
     ibis_literal = literal_to_ibis_scalar(literal)
     return ibis_dtype_to_bigframes_dtype(ibis_literal.type())
 
@@ -748,49 +354,44 @@ def infer_literal_arrow_type(literal) -> typing.Optional[pa.DataType]:
     if pd.isna(literal):
         return None  # Null value without a definite type
     # Temporary logic, use ibis inferred type
+    # TODO: Directly convert instead of using ibis dtype as intermediate step
+    from bigframes.core.compile.ibis_types import (
+        _ibis_dtype_to_arrow_dtype,
+        literal_to_ibis_scalar,
+    )
+
     ibis_literal = literal_to_ibis_scalar(literal)
-    return ibis_dtype_to_arrow_dtype(ibis_literal.type())
+    return _ibis_dtype_to_arrow_dtype(ibis_literal.type())
+
+
+def bf_type_from_type_kind(bf_schema) -> Dict[str, Dtype]:
+    """Converts bigquery sql type to the default bigframes dtype."""
+    ibis_schema: ibis.Schema = third_party_ibis_bqtypes.BigQuerySchema.to_ibis(
+        bf_schema
+    )
+    # TODO: Directly convert instead of using ibis dtype as intermediate step
+    from bigframes.core.compile.ibis_types import ibis_dtype_to_bigframes_dtype
 
+    return {
+        name: ibis_dtype_to_bigframes_dtype(type) for name, type in ibis_schema.items()
+    }
+
+
+# Remote functions use only
+# TODO: Refactor into remote function module
 
 # Input and output types supported by BigQuery DataFrames remote functions.
 # TODO(shobs): Extend the support to all types supported by BQ remote functions
 # https://cloud.google.com/bigquery/docs/remote-functions#limitations
-SUPPORTED_IO_PYTHON_TYPES = {bool, float, int, str}
-SUPPORTED_IO_BIGQUERY_TYPEKINDS = {
+RF_SUPPORTED_IO_PYTHON_TYPES = {bool, bytes, float, int, str}
+
+RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS = {
     "BOOLEAN",
     "BOOL",
+    "BYTES",
     "FLOAT",
     "FLOAT64",
     "INT64",
     "INTEGER",
     "STRING",
 }
-
-
-class UnsupportedTypeError(ValueError):
-    def __init__(self, type_, supported_types):
-        self.type = type_
-        self.supported_types = supported_types
-
-
-def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType:
-    if t not in SUPPORTED_IO_PYTHON_TYPES:
-        raise UnsupportedTypeError(t, SUPPORTED_IO_PYTHON_TYPES)
-    return python_type_to_bigquery_type(t)
-
-
-def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
-    """Convert bq type to ibis. Only to be used for remote functions, does not handle all types."""
-    if tk not in SUPPORTED_IO_BIGQUERY_TYPEKINDS:
-        raise UnsupportedTypeError(tk, SUPPORTED_IO_BIGQUERY_TYPEKINDS)
-    return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
-
-
-def bf_type_from_type_kind(bf_schema) -> Dict[str, Dtype]:
-    """Converts bigquery sql type to the default bigframes dtype."""
-    ibis_schema: ibis.Schema = third_party_ibis_bqtypes.BigQuerySchema.to_ibis(
-        bf_schema
-    )
-    return {
-        name: ibis_dtype_to_bigframes_dtype(type) for name, type in ibis_schema.items()
-    }
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index 7be252406c..472ac07547 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -24,7 +24,6 @@
 import string
 import sys
 import tempfile
-import textwrap
 from typing import (
     Any,
     cast,
@@ -33,6 +32,7 @@
     NamedTuple,
     Optional,
     Sequence,
+    Tuple,
     TYPE_CHECKING,
     Union,
 )
@@ -40,6 +40,7 @@
 
 import ibis
 import pandas
+import pyarrow
 import requests
 
 if TYPE_CHECKING:
@@ -60,7 +61,8 @@
 
 from bigframes import clients
 import bigframes.constants as constants
-import bigframes.dtypes
+import bigframes.core.compile.ibis_types
+import bigframes.functions.remote_function_template
 
 logger = logging.getLogger(__name__)
 
@@ -182,15 +184,11 @@ def create_bq_remote_function(
         # Create BQ function
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
         bq_function_args = []
-        bq_function_return_type = third_party_ibis_bqtypes.BigQueryType.from_ibis(
-            output_type
-        )
+        bq_function_return_type = output_type
 
         # We are expecting the input type annotations to be 1:1 with the input args
-        for idx, name in enumerate(input_args):
-            bq_function_args.append(
-                f"{name} {third_party_ibis_bqtypes.BigQueryType.from_ibis(input_types[idx])}"
-            )
+        for name, type_ in zip(input_args, input_types):
+            bq_function_args.append(f"{name} {type_}")
 
         remote_function_options = {
             "endpoint": endpoint,
@@ -258,173 +256,24 @@ def get_cloud_function_endpoint(self, name):
             pass
         return None
 
-    def generate_udf_code(self, def_, dir):
-        """Generate serialized bytecode using cloudpickle given a udf."""
-        udf_code_file_name = "udf.py"
-        udf_bytecode_file_name = "udf.cloudpickle"
-
-        # original code, only for debugging purpose
-        udf_code = textwrap.dedent(inspect.getsource(def_))
-        udf_code_file_path = os.path.join(dir, udf_code_file_name)
-        with open(udf_code_file_path, "w") as f:
-            f.write(udf_code)
-
-        # serialized bytecode
-        udf_bytecode_file_path = os.path.join(dir, udf_bytecode_file_name)
-        with open(udf_bytecode_file_path, "wb") as f:
-            cloudpickle.dump(def_, f, protocol=_pickle_protocol_version)
-
-        return udf_code_file_name, udf_bytecode_file_name
-
-    def generate_cloud_function_main_code(self, def_, dir, is_row_processor=False):
-        """Get main.py code for the cloud function for the given user defined function."""
-
-        # Pickle the udf with all its dependencies
-        udf_code_file, udf_bytecode_file = self.generate_udf_code(def_, dir)
-        handler_func_name = "udf_http"
-
-        # We want to build a cloud function that works for BQ remote functions,
-        # where we receive `calls` in json which is a batch of rows from BQ SQL.
-        # The number and the order of values in each row is expected to exactly
-        # match to the number and order of arguments in the udf , e.g. if the udf is
-        #   def foo(x: int, y: str):
-        #     ...
-        # then the http request body could look like
-        # {
-        #   ...
-        #   "calls" : [
-        #     [123, "hello"],
-        #     [456, "world"]
-        #   ]
-        #   ...
-        # }
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#input_format
-        code = """\
-import cloudpickle
-import functions_framework
-from flask import jsonify
-import json
-"""
-        if is_row_processor:
-            code += """\
-import ast
-import math
-import pandas as pd
-
-def get_pd_series(row):
-    row_json = json.loads(row)
-    col_names = row_json["names"]
-    col_types = row_json["types"]
-    col_values = row_json["values"]
-    index_length = row_json["indexlength"]
-    dtype = row_json["dtype"]
-
-    # At this point we are assuming that col_names, col_types and col_values are
-    # arrays of the same length, representing column names, types and values for
-    # one row of data
-
-    # column names are not necessarily strings
-    # they are serialized as repr(name) at source
-    evaluated_col_names = []
-    for col_name in col_names:
-        try:
-            col_name = ast.literal_eval(col_name)
-        except Exception as ex:
-            raise NameError(f"Failed to evaluate column name from '{col_name}': {ex}")
-        evaluated_col_names.append(col_name)
-    col_names = evaluated_col_names
-
-    # Supported converters for pandas to python types
-    value_converters = {
-        "boolean": lambda val: val == "true",
-        "Int64": int,
-        "Float64": float,
-        "string": str,
-    }
-
-    def convert_value(value, value_type):
-        value_converter = value_converters.get(value_type)
-        if value_converter is None:
-            raise ValueError(f"Don't know how to handle type '{value_type}'")
-        if value is None:
-            return None
-        return value_converter(value)
-
-    index_values = [
-        pd.Series([convert_value(col_values[i], col_types[i])], dtype=col_types[i])[0]
-        for i in range(index_length)
-    ]
-
-    data_col_names = col_names[index_length:]
-    data_col_types = col_types[index_length:]
-    data_col_values = col_values[index_length:]
-    data_col_values = [
-        pd.Series([convert_value(a, data_col_types[i])], dtype=data_col_types[i])[0]
-        for i, a in enumerate(data_col_values)
-    ]
-
-    row_index = index_values[0] if len(index_values) == 1 else tuple(index_values)
-    row_series = pd.Series(data_col_values, index=data_col_names, name=row_index, dtype=dtype)
-    return row_series
-"""
-        code += f"""\
-
-# original udf code is in {udf_code_file}
-# serialized udf code is in {udf_bytecode_file}
-with open("{udf_bytecode_file}", "rb") as f:
-    udf = cloudpickle.load(f)
-
-def {handler_func_name}(request):
-    try:
-        request_json = request.get_json(silent=True)
-        calls = request_json["calls"]
-        replies = []
-        for call in calls:
-"""
-
-        if is_row_processor:
-            code += """\
-            reply = udf(get_pd_series(call[0]))
-            if isinstance(reply, float) and (math.isnan(reply) or math.isinf(reply)):
-                # json serialization of the special float values (nan, inf, -inf)
-                # is not in strict compliance of the JSON specification
-                # https://docs.python.org/3/library/json.html#basic-usage.
-                # Let's convert them to a quoted string representation ("NaN",
-                # "Infinity", "-Infinity" respectively) which is handled by
-                # BigQuery
-                reply = json.dumps(reply)
-            elif pd.isna(reply):
-                # Pandas N/A values are not json serializable, so use a python
-                # equivalent instead
-                reply = None
-            elif hasattr(reply, "item"):
-                # Numpy types are not json serializable, so use its Python
-                # value instead
-                reply = reply.item()
-"""
-        else:
-            code += """\
-            reply = udf(*call)
-"""
-        code += """\
-            replies.append(reply)
-        return_json = json.dumps({"replies" : replies})
-        return return_json
-    except Exception as e:
-        return jsonify( { "errorMessage": str(e) } ), 400
-"""
-
-        main_py = os.path.join(dir, "main.py")
-        with open(main_py, "w") as f:
-            f.write(code)
-        logger.debug(f"Wrote {os.path.abspath(main_py)}:\n{open(main_py).read()}")
-
-        return handler_func_name
-
     def generate_cloud_function_code(
-        self, def_, dir, package_requirements=None, is_row_processor=False
+        self,
+        def_,
+        directory,
+        *,
+        input_types: Tuple[str],
+        output_type: str,
+        package_requirements=None,
+        is_row_processor=False,
     ):
-        """Generate the cloud function code for a given user defined function."""
+        """Generate the cloud function code for a given user defined function.
+
+        Args:
+            input_types (tuple[str]):
+                Types of the input arguments in BigQuery SQL data type names.
+            output_type (str):
+                Types of the output scalar as a BigQuery SQL data type name.
+        """
 
         # requirements.txt
         requirements = ["cloudpickle >= 2.1.0"]
@@ -432,16 +281,21 @@ def generate_cloud_function_code(
             # bigframes remote function will send an entire row of data as json,
             # which would be converted to a pandas series and processed
             requirements.append(f"pandas=={pandas.__version__}")
+            requirements.append(f"pyarrow=={pyarrow.__version__}")
         if package_requirements:
             requirements.extend(package_requirements)
         requirements = sorted(requirements)
-        requirements_txt = os.path.join(dir, "requirements.txt")
+        requirements_txt = os.path.join(directory, "requirements.txt")
         with open(requirements_txt, "w") as f:
             f.write("\n".join(requirements))
 
         # main.py
-        entry_point = self.generate_cloud_function_main_code(
-            def_, dir, is_row_processor
+        entry_point = bigframes.functions.remote_function_template.generate_cloud_function_main_code(
+            def_,
+            directory,
+            input_types=input_types,
+            output_type=output_type,
+            is_row_processor=is_row_processor,
         )
         return entry_point
 
@@ -449,20 +303,35 @@ def create_cloud_function(
         self,
         def_,
         cf_name,
+        *,
+        input_types: Tuple[str],
+        output_type: str,
         package_requirements=None,
         timeout_seconds=600,
         max_instance_count=None,
         is_row_processor=False,
         vpc_connector=None,
     ):
-        """Create a cloud function from the given user defined function."""
+        """Create a cloud function from the given user defined function.
+
+        Args:
+            input_types (tuple[str]):
+                Types of the input arguments in BigQuery SQL data type names.
+            output_type (str):
+                Types of the output scalar as a BigQuery SQL data type name.
+        """
 
         # Build and deploy folder structure containing cloud function
-        with tempfile.TemporaryDirectory() as dir:
+        with tempfile.TemporaryDirectory() as directory:
             entry_point = self.generate_cloud_function_code(
-                def_, dir, package_requirements, is_row_processor
+                def_,
+                directory,
+                package_requirements=package_requirements,
+                input_types=input_types,
+                output_type=output_type,
+                is_row_processor=is_row_processor,
             )
-            archive_path = shutil.make_archive(dir, "zip", dir)
+            archive_path = shutil.make_archive(directory, "zip", directory)
 
             # We are creating cloud function source code from the currently running
             # python version. Use the same version to deploy. This is necessary
@@ -607,11 +476,13 @@ def provision_bq_remote_function(
             cf_endpoint = self.create_cloud_function(
                 def_,
                 cloud_function_name,
-                package_requirements,
-                cloud_function_timeout,
-                cloud_function_max_instance_count,
-                is_row_processor,
-                cloud_function_vpc_connector,
+                input_types=input_types,
+                output_type=output_type,
+                package_requirements=package_requirements,
+                timeout_seconds=cloud_function_timeout,
+                max_instance_count=cloud_function_max_instance_count,
+                is_row_processor=is_row_processor,
+                vpc_connector=cloud_function_vpc_connector,
             )
         else:
             logger.info(f"Cloud function {cloud_function_name} already exists.")
@@ -686,12 +557,16 @@ def ibis_signature_from_python_signature(
     input_types: Sequence[type],
     output_type: type,
 ) -> IbisSignature:
+
     return IbisSignature(
         parameter_names=list(signature.parameters.keys()),
         input_types=[
-            bigframes.dtypes.ibis_type_from_python_type(t) for t in input_types
+            bigframes.core.compile.ibis_types.ibis_type_from_python_type(t)
+            for t in input_types
         ],
-        output_type=bigframes.dtypes.ibis_type_from_python_type(output_type),
+        output_type=bigframes.core.compile.ibis_types.ibis_type_from_python_type(
+            output_type
+        ),
     )
 
 
@@ -699,6 +574,7 @@ class ReturnTypeMissingError(ValueError):
     pass
 
 
+# TODO: Move this to compile folder
 def ibis_signature_from_routine(routine: bigquery.Routine) -> IbisSignature:
     if not routine.return_type:
         raise ReturnTypeMissingError
@@ -706,12 +582,14 @@ def ibis_signature_from_routine(routine: bigquery.Routine) -> IbisSignature:
     return IbisSignature(
         parameter_names=[arg.name for arg in routine.arguments],
         input_types=[
-            bigframes.dtypes.ibis_type_from_type_kind(arg.data_type.type_kind)
+            bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
+                arg.data_type.type_kind
+            )
             if arg.data_type
             else None
             for arg in routine.arguments
         ],
-        output_type=bigframes.dtypes.ibis_type_from_type_kind(
+        output_type=bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
             routine.return_type.type_kind
         ),
     )
@@ -920,8 +798,9 @@ def remote_function(
             https://cloud.google.com/functions/docs/networking/connecting-vpc.
     """
     # Some defaults may be used from the session if not provided otherwise
+    import bigframes.exceptions as bf_exceptions
     import bigframes.pandas as bpd
-    import bigframes.series
+    import bigframes.series as bf_series
     import bigframes.session
 
     session = cast(bigframes.session.Session, session or bpd.get_global_session())
@@ -1059,13 +938,13 @@ def wrapper(func):
         # BigQuery DataFrames and pandas object types for compatibility.
         is_row_processor = False
         if len(input_types) == 1 and (
-            (input_type := input_types[0]) == bigframes.series.Series
+            (input_type := input_types[0]) == bf_series.Series
             or input_type == pandas.Series
         ):
             warnings.warn(
                 "input_types=Series is in preview.",
                 stacklevel=1,
-                category=bigframes.exceptions.PreviewWarning,
+                category=bf_exceptions.PreviewWarning,
             )
 
             # we will model the row as a json serialized string containing the data
@@ -1112,16 +991,21 @@ def try_delattr(attr):
 
         rf_name, cf_name = remote_function_client.provision_bq_remote_function(
             func,
-            ibis_signature.input_types,
-            ibis_signature.output_type,
-            reuse,
-            name,
-            packages,
-            max_batching_rows,
-            cloud_function_timeout,
-            cloud_function_max_instances,
-            is_row_processor,
-            cloud_function_vpc_connector,
+            input_types=tuple(
+                third_party_ibis_bqtypes.BigQueryType.from_ibis(type_)
+                for type_ in ibis_signature.input_types
+            ),
+            output_type=third_party_ibis_bqtypes.BigQueryType.from_ibis(
+                ibis_signature.output_type
+            ),
+            reuse=reuse,
+            name=name,
+            package_requirements=packages,
+            max_batching_rows=max_batching_rows,
+            cloud_function_timeout=cloud_function_timeout,
+            cloud_function_max_instance_count=cloud_function_max_instances,
+            is_row_processor=is_row_processor,
+            cloud_function_vpc_connector=cloud_function_vpc_connector,
         )
 
         # TODO: Move ibis logic to compiler step
@@ -1135,8 +1019,11 @@ def try_delattr(attr):
             remote_function_client.get_cloud_function_fully_qualified_name(cf_name)
         )
         func.bigframes_remote_function = str(dataset_ref.routine(rf_name))  # type: ignore
-        func.output_dtype = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(
-            ibis_signature.output_type
+
+        func.output_dtype = (
+            bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
+                ibis_signature.output_type
+            )
         )
         func.ibis_node = node
         return func
@@ -1146,21 +1033,14 @@ def try_delattr(attr):
 
 def read_gbq_function(
     function_name: str,
-    session: Optional[Session] = None,
-    bigquery_client: Optional[bigquery.Client] = None,
+    *,
+    session: Session,
 ):
     """
     Read an existing BigQuery function and prepare it for use in future queries.
     """
-
-    # A BigQuery client is required to perform BQ operations
-    if not bigquery_client and session:
-        bigquery_client = session.bqclient
-    if not bigquery_client:
-        raise ValueError(
-            "A bigquery client must be provided, either directly or via session. "
-            f"{constants.FEEDBACK_LINK}"
-        )
+    bigquery_client = session.bqclient
+    ibis_client = session.ibis_client
 
     try:
         routine_ref = get_routine_reference(function_name, bigquery_client, session)
@@ -1182,7 +1062,7 @@ def read_gbq_function(
         raise ValueError(
             f"Function return type must be specified. {constants.FEEDBACK_LINK}"
         )
-    except bigframes.dtypes.UnsupportedTypeError as e:
+    except bigframes.core.compile.ibis_types.UnsupportedTypeError as e:
         raise ValueError(
             f"Type {e.type} not supported, supported types are {e.supported_types}. "
             f"{constants.FEEDBACK_LINK}"
@@ -1192,10 +1072,13 @@ def read_gbq_function(
     # non-standard names for the arguments here.
     def func(*ignored_args, **ignored_kwargs):
         f"""Remote function {str(routine_ref)}."""
-        # TODO(swast): Construct an ibis client from bigquery_client and
-        # execute node via a query.
+        nonlocal node  # type: ignore
+
+        expr = node(*ignored_args, **ignored_kwargs)  # type: ignore
+        return ibis_client.execute(expr)
 
     # TODO: Move ibis logic to compiler step
+
     func.__name__ = routine_ref.routine_id
 
     node = ibis.udf.scalar.builtin(
@@ -1205,7 +1088,7 @@ def func(*ignored_args, **ignored_kwargs):
         signature=(ibis_signature.input_types, ibis_signature.output_type),
     )
     func.bigframes_remote_function = str(routine_ref)  # type: ignore
-    func.output_dtype = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(  # type: ignore
+    func.output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(  # type: ignore
         ibis_signature.output_type
     )
     func.ibis_node = node  # type: ignore
diff --git a/bigframes/functions/remote_function_template.py b/bigframes/functions/remote_function_template.py
new file mode 100644
index 0000000000..68fe1b917d
--- /dev/null
+++ b/bigframes/functions/remote_function_template.py
@@ -0,0 +1,289 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import inspect
+import logging
+import os
+import textwrap
+from typing import Tuple
+
+import cloudpickle
+
+logger = logging.getLogger(__name__)
+
+
+# Protocol version 4 is available in python version 3.4 and above
+# https://docs.python.org/3/library/pickle.html#data-stream-format
+_pickle_protocol_version = 4
+
+
+# Placeholder variables for testing.
+input_types = ("STRING",)
+output_type = "STRING"
+
+
+# Convert inputs to BigQuery JSON. See:
+# https://cloud.google.com/bigquery/docs/remote-functions#json_encoding_of_sql_data_type
+# and
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#to_json_string
+def convert_call(input_types, call):
+    for type_, arg in zip(input_types, call):
+        yield convert_from_bq_json(type_, arg)
+
+
+def convert_from_bq_json(type_, arg):
+    import base64
+    import collections
+
+    converters = collections.defaultdict(lambda: (lambda value: value))  # type: ignore
+    converters["BYTES"] = base64.b64decode
+    converter = converters[type_]
+    return converter(arg) if arg is not None else None
+
+
+def convert_to_bq_json(type_, arg):
+    import base64
+    import collections
+
+    converters = collections.defaultdict(lambda: (lambda value: value))  # type: ignore
+    converters["BYTES"] = lambda value: base64.b64encode(value).decode("utf-8")
+    converter = converters[type_]
+    return converter(arg) if arg is not None else None
+
+
+# get_pd_series is the inverse of Block._get_rows_as_json_values
+# NOTE: Keep in sync with the list of supported types in DataFrame.apply.
+def get_pd_series(row):
+    import ast
+    import base64
+    import json
+    from typing import Callable, cast
+
+    import pandas as pd
+
+    row_json = json.loads(row)
+    col_names = row_json["names"]
+    col_types = row_json["types"]
+    col_values = row_json["values"]
+    index_length = row_json["indexlength"]
+    dtype = row_json["dtype"]
+
+    # At this point we are assuming that col_names, col_types and col_values are
+    # arrays of the same length, representing column names, types and values for
+    # one row of data
+
+    # column names are not necessarily strings
+    # they are serialized as repr(name) at source
+    evaluated_col_names = []
+    for col_name in col_names:
+        try:
+            col_name = ast.literal_eval(col_name)
+        except Exception as ex:
+            raise NameError(f"Failed to evaluate column name from '{col_name}': {ex}")
+        evaluated_col_names.append(col_name)
+    col_names = evaluated_col_names
+
+    # Supported converters for pandas to python types
+    value_converters = {
+        "boolean": lambda val: val == "true",
+        "Int64": int,
+        "Float64": float,
+        "string": str,
+        "binary[pyarrow]": base64.b64decode,
+    }
+
+    def convert_value(value, value_type):
+        value_converter = cast(Callable, value_converters.get(value_type))
+        if value_converter is None:
+            raise ValueError(f"Don't know how to handle type '{value_type}'")
+        if value is None:
+            return None
+        return value_converter(value)
+
+    index_values = [
+        pd.Series([convert_value(col_values[i], col_types[i])], dtype=col_types[i])[0]
+        for i in range(index_length)
+    ]
+
+    data_col_names = col_names[index_length:]
+    data_col_types = col_types[index_length:]
+    data_col_values = col_values[index_length:]
+    data_col_values = [
+        pd.Series([convert_value(a, data_col_types[i])], dtype=data_col_types[i])[0]
+        for i, a in enumerate(data_col_values)
+    ]
+
+    row_index = index_values[0] if len(index_values) == 1 else tuple(index_values)
+    row_series = pd.Series(
+        data_col_values, index=data_col_names, name=row_index, dtype=dtype
+    )
+    return row_series
+
+
+def udf(*args):
+    """Dummy function to use as a placeholder for function code in templates."""
+    pass
+
+
+# We want to build a cloud function that works for BQ remote functions,
+# where we receive `calls` in json which is a batch of rows from BQ SQL.
+# The number and the order of values in each row is expected to exactly
+# match to the number and order of arguments in the udf , e.g. if the udf is
+#   def foo(x: int, y: str):
+#     ...
+# then the http request body could look like
+# {
+#   ...
+#   "calls" : [
+#     [123, "hello"],
+#     [456, "world"]
+#   ]
+#   ...
+# }
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#input_format
+def udf_http(request):
+    global input_types, output_type
+    import json
+    import traceback
+
+    from flask import jsonify
+
+    try:
+        request_json = request.get_json(silent=True)
+        calls = request_json["calls"]
+        replies = []
+        for call in calls:
+            reply = convert_to_bq_json(
+                output_type, udf(*convert_call(input_types, call))
+            )
+            replies.append(reply)
+        return_json = json.dumps({"replies": replies})
+        return return_json
+    except Exception:
+        return jsonify({"errorMessage": traceback.format_exc()}), 400
+
+
+def udf_http_row_processor(request):
+    global output_type
+    import json
+    import math
+    import traceback
+
+    from flask import jsonify
+    import pandas as pd
+
+    try:
+        request_json = request.get_json(silent=True)
+        calls = request_json["calls"]
+        replies = []
+        for call in calls:
+            reply = convert_to_bq_json(output_type, udf(get_pd_series(call[0])))
+            if isinstance(reply, float) and (math.isnan(reply) or math.isinf(reply)):
+                # json serialization of the special float values (nan, inf, -inf)
+                # is not in strict compliance of the JSON specification
+                # https://docs.python.org/3/library/json.html#basic-usage.
+                # Let's convert them to a quoted string representation ("NaN",
+                # "Infinity", "-Infinity" respectively) which is handled by
+                # BigQuery
+                reply = json.dumps(reply)
+            elif pd.isna(reply):
+                # Pandas N/A values are not json serializable, so use a python
+                # equivalent instead
+                reply = None
+            elif hasattr(reply, "item"):
+                # Numpy types are not json serializable, so use its Python
+                # value instead
+                reply = reply.item()
+            replies.append(reply)
+        return_json = json.dumps({"replies": replies})
+        return return_json
+    except Exception:
+        return jsonify({"errorMessage": traceback.format_exc()}), 400
+
+
+def generate_udf_code(def_, directory):
+    """Generate serialized bytecode using cloudpickle given a udf."""
+    udf_code_file_name = "udf.py"
+    udf_bytecode_file_name = "udf.cloudpickle"
+
+    # original code, only for debugging purpose
+    udf_code = textwrap.dedent(inspect.getsource(def_))
+    udf_code_file_path = os.path.join(directory, udf_code_file_name)
+    with open(udf_code_file_path, "w") as f:
+        f.write(udf_code)
+
+    # serialized bytecode
+    udf_bytecode_file_path = os.path.join(directory, udf_bytecode_file_name)
+    # TODO(b/345433300): try io.BytesIO to avoid writing to the file system
+    with open(udf_bytecode_file_path, "wb") as f:
+        cloudpickle.dump(def_, f, protocol=_pickle_protocol_version)
+
+    return udf_code_file_name, udf_bytecode_file_name
+
+
+def generate_cloud_function_main_code(
+    def_,
+    directory,
+    *,
+    input_types: Tuple[str],
+    output_type: str,
+    is_row_processor=False,
+):
+    """Get main.py code for the cloud function for the given user defined function.
+
+    Args:
+        input_types (tuple[str]):
+            Types of the input arguments in BigQuery SQL data type names.
+        output_type (str):
+            Types of the output scalar as a BigQuery SQL data type name.
+    """
+
+    # Pickle the udf with all its dependencies
+    udf_code_file, udf_bytecode_file = generate_udf_code(def_, directory)
+
+    code_blocks = [
+        f"""\
+import cloudpickle
+
+# original udf code is in {udf_code_file}
+# serialized udf code is in {udf_bytecode_file}
+with open("{udf_bytecode_file}", "rb") as f:
+    udf = cloudpickle.load(f)
+
+input_types = {repr(input_types)}
+output_type = {repr(output_type)}
+"""
+    ]
+
+    # For converting scalar outputs to the correct type.
+    code_blocks.append(inspect.getsource(convert_to_bq_json))
+
+    if is_row_processor:
+        code_blocks.append(inspect.getsource(get_pd_series))
+        handler_func_name = "udf_http_row_processor"
+        code_blocks.append(inspect.getsource(udf_http_row_processor))
+    else:
+        code_blocks.append(inspect.getsource(convert_call))
+        code_blocks.append(inspect.getsource(convert_from_bq_json))
+        handler_func_name = "udf_http"
+        code_blocks.append(inspect.getsource(udf_http))
+
+    main_py = os.path.join(directory, "main.py")
+    with open(main_py, "w") as f:
+        f.writelines(code_blocks)
+    logger.debug(f"Wrote {os.path.abspath(main_py)}:\n{open(main_py).read()}")
+
+    return handler_func_name
diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py
index ad0bce481f..41dea7617f 100644
--- a/bigframes/ml/decomposition.py
+++ b/bigframes/ml/decomposition.py
@@ -190,5 +190,5 @@ def score(
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before score")
 
-        # TODO(b/291973741): X param is ignored. Update BQML supports input in ML.EVALUTE.
+        # TODO(b/291973741): X param is ignored. Update BQML supports input in ML.EVALUATE.
         return self._bqml_model.evaluate()
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 8fc1e22146..0194d768b8 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -460,7 +460,7 @@ def score(
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#regression_models
             for the outputs relevant to this model type.
 
@@ -616,7 +616,7 @@ def score(
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#classification_models
             for the outputs relevant to this model type.
 
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index a87df61801..a1ae8435d5 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -313,7 +313,7 @@ def score(
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#time_series_models
             for the outputs relevant to this model type.
 
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 32168e9a34..0816ef9b24 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -195,7 +195,7 @@ def __init__(
         self,
         *,
         optimize_strategy: Literal[
-            "auto_strategy", "batch_gradient_descent", "normal_equation"
+            "auto_strategy", "batch_gradient_descent"
         ] = "auto_strategy",
         fit_intercept: bool = True,
         l1_reg: Optional[float] = None,
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 7fa0e236eb..2517178d89 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -329,7 +329,7 @@ def score(
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#remote-model-llm
             for the outputs relevant to this model type.
 
@@ -571,6 +571,8 @@ class GeminiTextGenerator(base.BaseEstimator):
             Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
             If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
             permission if the connection isn't fully set up.
+        max_iterations (Optional[int], Default to 300):
+            The number of steps to run when performing supervised tuning.
     """
 
     def __init__(
@@ -581,9 +583,11 @@ def __init__(
         ] = "gemini-pro",
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
+        max_iterations: int = 300,
     ):
         self.model_name = model_name
         self.session = session or bpd.get_global_session()
+        self.max_iterations = max_iterations
         self._bq_connection_manager = self.session.bqconnectionmanager
 
         connection_name = connection_name or self.session._bq_connection
@@ -647,6 +651,55 @@ def _from_bq(
         model._bqml_model = core.BqmlModel(session, bq_model)
         return model
 
+    @property
+    def _bqml_options(self) -> dict:
+        """The model options as they will be set for BQML"""
+        options = {
+            "max_iterations": self.max_iterations,
+            "data_split_method": "NO_SPLIT",
+        }
+        return options
+
+    def fit(
+        self,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y: Union[bpd.DataFrame, bpd.Series],
+    ) -> GeminiTextGenerator:
+        """Fine tune GeminiTextGenerator model. Only support "gemini-pro" model for now.
+
+        .. note::
+
+            This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
+            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
+            and might have limited support. For more information, see the launch stage descriptions
+            (https://cloud.google.com/products#product-launch-stages).
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                DataFrame of shape (n_samples, n_features). Training data.
+            y (bigframes.dataframe.DataFrame or bigframes.series.Series:
+                Training labels.
+
+        Returns:
+            GeminiTextGenerator: Fitted estimator.
+        """
+        if self._bqml_model.model_name.startswith("gemini-1.5"):
+            raise NotImplementedError("Fit is not supported for gemini-1.5 model.")
+
+        X, y = utils.convert_to_dataframe(X, y)
+
+        options = self._bqml_options
+        options["endpoint"] = "gemini-1.0-pro-002"
+        options["prompt_col"] = X.columns.tolist()[0]
+
+        self._bqml_model = self._bqml_model_factory.create_llm_remote_model(
+            X,
+            y,
+            options=options,
+            connection_name=self.connection_name,
+        )
+        return self
+
     def predict(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
@@ -732,6 +785,67 @@ def predict(
 
         return df
 
+    def score(
+        self,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y: Union[bpd.DataFrame, bpd.Series],
+        task_type: Literal[
+            "text_generation", "classification", "summarization", "question_answering"
+        ] = "text_generation",
+    ) -> bpd.DataFrame:
+        """Calculate evaluation metrics of the model. Only "gemini-pro" model is supported for now.
+
+        .. note::
+
+            This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
+            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
+            and might have limited support. For more information, see the launch stage descriptions
+            (https://cloud.google.com/products#product-launch-stages).
+
+        .. note::
+
+            Output matches that of the BigQuery ML.EVALUATE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#remote-model-llm
+            for the outputs relevant to this model type.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                A BigQuery DataFrame as evaluation data, which contains only one column of input_text
+                that contains the prompt text to use when evaluating the model.
+            y (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                A BigQuery DataFrame as evaluation labels, which contains only one column of output_text
+                that you would expect to be returned by the model.
+            task_type (str):
+                The type of the task for LLM model. Default to "text_generation".
+                Possible values: "text_generation", "classification", "summarization", and "question_answering".
+
+        Returns:
+            bigframes.dataframe.DataFrame: The DataFrame as evaluation result.
+        """
+        if not self._bqml_model:
+            raise RuntimeError("A model must be fitted before score")
+
+        # TODO(ashleyxu): Support gemini-1.5 when the rollout is ready. b/344891364.
+        if self._bqml_model.model_name.startswith("gemini-1.5"):
+            raise NotImplementedError("Score is not supported for gemini-1.5 model.")
+
+        X, y = utils.convert_to_dataframe(X, y)
+
+        if len(X.columns) != 1 or len(y.columns) != 1:
+            raise ValueError(
+                f"Only support one column as input for X and y. {constants.FEEDBACK_LINK}"
+            )
+
+        # BQML identified the column by name
+        X_col_label = cast(blocks.Label, X.columns[0])
+        y_col_label = cast(blocks.Label, y.columns[0])
+        X = X.rename(columns={X_col_label: "input_text"})
+        y = y.rename(columns={y_col_label: "output_text"})
+
+        input_data = X.join(y, how="outer")
+
+        return self._bqml_model.llm_evaluate(input_data, task_type)
+
     def to_gbq(self, model_name: str, replace: bool = False) -> GeminiTextGenerator:
         """Save the model to BigQuery.
 
diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py
index 66f207929a..515fb50c6f 100644
--- a/bigframes/ml/loader.py
+++ b/bigframes/ml/loader.py
@@ -98,13 +98,7 @@ def from_bq(
     Returns:
         A BigQuery DataFrames ML model object.
     """
-    # TODO(garrettwu): the entire condition only to TRANSFORM_ONLY when b/331679273 is fixed.
-    if (
-        bq_model.model_type == "TRANSFORM_ONLY"
-        or bq_model.model_type == "MODEL_TYPE_UNSPECIFIED"
-        and "transformColumns" in bq_model._properties
-        and not _is_bq_model_remote(bq_model)
-    ):
+    if bq_model.model_type == "TRANSFORM_ONLY":
         return _transformer_from_bq(session, bq_model)
 
     if _is_bq_model_pipeline(bq_model):
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
index 2525ecd34f..a40c175000 100644
--- a/bigframes/ml/metrics/_metrics.py
+++ b/bigframes/ml/metrics/_metrics.py
@@ -227,7 +227,7 @@ def recall_score(
     y_true: Union[bpd.DataFrame, bpd.Series],
     y_pred: Union[bpd.DataFrame, bpd.Series],
     *,
-    average: str = "binary",
+    average: typing.Optional[str] = "binary",
 ) -> pd.Series:
     # TODO(ashleyxu): support more average type, default to "binary"
     if average is not None:
@@ -264,7 +264,7 @@ def precision_score(
     y_true: Union[bpd.DataFrame, bpd.Series],
     y_pred: Union[bpd.DataFrame, bpd.Series],
     *,
-    average: str = "binary",
+    average: typing.Optional[str] = "binary",
 ) -> pd.Series:
     # TODO(ashleyxu): support more average type, default to "binary"
     if average is not None:
@@ -303,7 +303,7 @@ def f1_score(
     y_true: Union[bpd.DataFrame, bpd.Series],
     y_pred: Union[bpd.DataFrame, bpd.Series],
     *,
-    average: str = "binary",
+    average: typing.Optional[str] = "binary",
 ) -> pd.Series:
     # TODO(ashleyxu): support more average type, default to "binary"
     y_true_series, y_pred_series = utils.convert_to_series(y_true, y_pred)
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 42f83913ee..c10b743631 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -161,7 +161,10 @@ def _convert_expr_input(
 def create_unary_op(name: str, type_signature: op_typing.UnaryTypeSignature) -> UnaryOp:
     return dataclasses.make_dataclass(
         name,
-        [("name", typing.ClassVar[str], name), ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method)],  # type: ignore
+        [
+            ("name", typing.ClassVar[str], name),
+            ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method),
+        ],
         bases=(UnaryOp,),
         frozen=True,
     )()
@@ -172,7 +175,10 @@ def create_binary_op(
 ) -> BinaryOp:
     return dataclasses.make_dataclass(
         name,
-        [("name", typing.ClassVar[str], name), ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method)],  # type: ignore
+        [
+            ("name", typing.ClassVar[str], name),
+            ("output_type", typing.ClassVar[typing.Callable], type_signature.as_method),
+        ],
         bases=(BinaryOp,),
         frozen=True,
     )()
@@ -493,8 +499,9 @@ def output_type(self, *input_types):
         if self.to_type == pa.string():
             return dtypes.STRING_DTYPE
         if isinstance(self.to_type, str):
-            # TODO(b/340895446): fix type error
-            return dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[self.to_type]  # type: ignore
+            return dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[
+                typing.cast(dtypes.DtypeString, self.to_type)
+            ]
         return self.to_type
 
 
@@ -516,8 +523,10 @@ class RemoteFunctionOp(UnaryOp):
 
     def output_type(self, *input_types):
         # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
-        # TODO(b/340895446): fix type error
-        return self.func.output_dtype  # type: ignore
+        if hasattr(self.func, "output_dtype"):
+            return self.func.output_dtype
+        else:
+            raise AttributeError("output_dtype not defined")
 
 
 @dataclasses.dataclass(frozen=True)
@@ -644,8 +653,10 @@ class BinaryRemoteFunctionOp(BinaryOp):
 
     def output_type(self, *input_types):
         # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
-        # TODO(b/340895446): fix type error
-        return self.func.output_dtype  # type: ignore
+        if hasattr(self.func, "output_dtype"):
+            return self.func.output_dtype
+        else:
+            raise AttributeError("output_dtype not defined")
 
 
 add_op = AddOp()
diff --git a/bigframes/operations/_matplotlib/__init__.py b/bigframes/operations/_matplotlib/__init__.py
index f869c1e01d..6ffe71139d 100644
--- a/bigframes/operations/_matplotlib/__init__.py
+++ b/bigframes/operations/_matplotlib/__init__.py
@@ -12,10 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
+
 import bigframes.operations._matplotlib.core as core
 import bigframes.operations._matplotlib.hist as hist
 
-PLOT_CLASSES: dict[str, type[core.MPLPlot]] = {
+PLOT_TYPES = typing.Union[type[core.SamplingPlot], type[hist.HistPlot]]
+
+PLOT_CLASSES: dict[str, PLOT_TYPES] = {
     "hist": hist.HistPlot,
     "line": core.LinePlot,
     "area": core.AreaPlot,
@@ -24,8 +28,7 @@
 
 
 def plot(data, kind, **kwargs):
-    # TODO(b/340896123): fix type error
-    plot_obj = PLOT_CLASSES[kind](data, **kwargs)  # type: ignore
+    plot_obj = PLOT_CLASSES[kind](data, **kwargs)
     plot_obj.generate()
     plot_obj.draw()
     return plot_obj.result
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index 78b3df1f19..ff8dd86cff 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -39,8 +39,10 @@ def draw(self) -> None:
 
     @property
     def result(self):
-        # TODO(b/340896123): fix type error
-        return self.axes  # type: ignore
+        if hasattr(self, "axes"):
+            return self.axes
+        else:
+            raise AttributeError("Axes not defined")
 
 
 class SamplingPlot(MPLPlot):
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 603f6678a5..f6f9aec800 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -142,7 +142,7 @@ def cut(
     x: bigframes.series.Series,
     bins: int,
     *,
-    labels: Optional[bool] = None,
+    labels: Union[Iterable[str], bool, None] = None,
 ) -> bigframes.series.Series:
     return bigframes.core.reshape.cut(
         x,
@@ -699,9 +699,35 @@ def read_gbq_function(function_name: str):
 read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function)
 
 
+@typing.overload
+def to_datetime(
+    arg: Union[
+        vendored_pandas_datetimes.local_iterables,
+        bigframes.series.Series,
+        bigframes.dataframe.DataFrame,
+    ],
+    *,
+    utc: bool = False,
+    format: Optional[str] = None,
+    unit: Optional[str] = None,
+) -> bigframes.series.Series:
+    ...
+
+
+@typing.overload
+def to_datetime(
+    arg: Union[int, float, str, datetime],
+    *,
+    utc: bool = False,
+    format: Optional[str] = None,
+    unit: Optional[str] = None,
+) -> Union[pandas.Timestamp, datetime]:
+    ...
+
+
 def to_datetime(
     arg: Union[
-        vendored_pandas_datetimes.local_scalars,
+        Union[int, float, str, datetime],
         vendored_pandas_datetimes.local_iterables,
         bigframes.series.Series,
         bigframes.dataframe.DataFrame,
diff --git a/bigframes/series.py b/bigframes/series.py
index 367301f08e..d858060aec 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -783,10 +783,10 @@ def pow(self, other: float | int | Series) -> Series:
     def rpow(self, other: float | int | Series) -> Series:
         return self._apply_binary_op(other, ops.pow_op, reverse=True)
 
-    def __lt__(self, other: float | int | Series) -> Series:  # type: ignore
+    def __lt__(self, other: float | int | str | Series) -> Series:
         return self.lt(other)
 
-    def __le__(self, other: float | int | Series) -> Series:  # type: ignore
+    def __le__(self, other: float | int | str | Series) -> Series:
         return self.le(other)
 
     def lt(self, other) -> Series:
@@ -795,10 +795,10 @@ def lt(self, other) -> Series:
     def le(self, other) -> Series:
         return self._apply_binary_op(other, ops.le_op)
 
-    def __gt__(self, other: float | int | Series) -> Series:  # type: ignore
+    def __gt__(self, other: float | int | str | Series) -> Series:
         return self.gt(other)
 
-    def __ge__(self, other: float | int | Series) -> Series:  # type: ignore
+    def __ge__(self, other: float | int | str | Series) -> Series:
         return self.ge(other)
 
     def gt(self, other) -> Series:
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 2919b2d77f..4c5ce21153 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -84,7 +84,6 @@
 import bigframes.core.compile
 import bigframes.core.guid
 import bigframes.core.nodes as nodes
-from bigframes.core.ordering import IntegerEncoding
 import bigframes.core.ordering as order
 import bigframes.core.schema as schemata
 import bigframes.core.tree_properties as traversals
@@ -295,6 +294,9 @@ def __init__(
         self._bytes_processed_sum = 0
         self._slot_millis_sum = 0
         self._execution_count = 0
+        # Whether this session treats objects as totally ordered.
+        # Will expose as feature later, only False for internal testing
+        self._strictly_ordered = True
 
     @property
     def bqclient(self):
@@ -1159,35 +1161,14 @@ def _read_pandas_load_job(
         )
         self._start_generic_job(load_job)
 
-        ordering = order.ExpressionOrdering(
-            ordering_value_columns=tuple([order.ascending_over(ordering_col)]),
-            total_ordering_columns=frozenset([ordering_col]),
-            integer_encoding=IntegerEncoding(True, is_sequential=True),
-        )
-        table_expression = self.ibis_client.table(  # type: ignore
-            load_table_destination.table_id,
-            schema=load_table_destination.dataset_id,
-            database=load_table_destination.project,
-        )
-
-        # b/297590178 Potentially a bug in bqclient.load_table_from_dataframe(), that only when the DF is empty, the index columns disappear in table_expression.
-        if any(
-            [new_idx_id not in table_expression.columns for new_idx_id in new_idx_ids]
-        ):
-            new_idx_ids, idx_labels = [], []
-
-        column_values = [
-            table_expression[col]
-            for col in table_expression.columns
-            if col != ordering_col
-        ]
-        array_value = core.ArrayValue.from_ibis(
-            self,
-            table_expression,
-            columns=column_values,
-            hidden_ordering_columns=[table_expression[ordering_col]],
-            ordering=ordering,
-        )
+        destination_table = self.bqclient.get_table(load_table_destination)
+        array_value = core.ArrayValue.from_table(
+            table=destination_table,
+            # TODO: Generate this directly from original pandas df.
+            schema=schemata.ArraySchema.from_bq_table(destination_table),
+            session=self,
+            offsets_col=ordering_col,
+        ).drop_columns([ordering_col])
 
         block = blocks.Block(
             array_value,
@@ -1863,30 +1844,20 @@ def _cache_with_cluster_cols(
         """Executes the query and uses the resulting table to rewrite future executions."""
         # TODO: Use this for all executions? Problem is that caching materializes extra
         # ordering columns
+        # TODO: May want to support some partial ordering info even for non-strict ordering mode
+        keep_order_info = self._strictly_ordered
+
         compiled_value = self._compile_ordered(array_value)
 
         ibis_expr = compiled_value._to_ibis_expr(
-            ordering_mode="unordered", expose_hidden_cols=True
+            ordering_mode="unordered", expose_hidden_cols=keep_order_info
         )
         tmp_table = self._ibis_to_temp_table(
             ibis_expr, cluster_cols=cluster_cols, api_name="cached"
         )
-        table_expression = self.ibis_client.table(
-            tmp_table.table_id,
-            schema=tmp_table.dataset_id,
-            database=tmp_table.project,
-        )
-        new_columns = [table_expression[column] for column in compiled_value.column_ids]
-        new_hidden_columns = [
-            table_expression[column]
-            for column in compiled_value._hidden_ordering_column_names
-        ]
-        cached_replacement = core.ArrayValue.from_ibis(
-            self,
-            table_expression,
-            columns=new_columns,
-            hidden_ordering_columns=new_hidden_columns,
-            ordering=compiled_value._ordering,
+        cached_replacement = array_value.as_cached(
+            cache_table=self.bqclient.get_table(tmp_table),
+            ordering=compiled_value._ordering if keep_order_info else None,
         ).node
         self._cached_executions[array_value.node] = cached_replacement
 
@@ -1894,6 +1865,10 @@ def _cache_with_offsets(self, array_value: core.ArrayValue):
         """Executes the query and uses the resulting table to rewrite future executions."""
         # TODO: Use this for all executions? Problem is that caching materializes extra
         # ordering columns
+        if not self._strictly_ordered:
+            raise ValueError(
+                "Caching with offsets only supported in strictly ordered mode."
+            )
         compiled_value = self._compile_ordered(array_value)
 
         ibis_expr = compiled_value._to_ibis_expr(
@@ -1902,18 +1877,8 @@ def _cache_with_offsets(self, array_value: core.ArrayValue):
         tmp_table = self._ibis_to_temp_table(
             ibis_expr, cluster_cols=["bigframes_offsets"], api_name="cached"
         )
-        table_expression = self.ibis_client.table(
-            tmp_table.table_id,
-            schema=tmp_table.dataset_id,
-            database=tmp_table.project,
-        )
-        new_columns = [table_expression[column] for column in compiled_value.column_ids]
-        new_hidden_columns = [table_expression["bigframes_offsets"]]
-        cached_replacement = core.ArrayValue.from_ibis(
-            self,
-            table_expression,
-            columns=new_columns,
-            hidden_ordering_columns=new_hidden_columns,
+        cached_replacement = array_value.as_cached(
+            cache_table=self.bqclient.get_table(tmp_table),
             ordering=order.ExpressionOrdering.from_offset_col("bigframes_offsets"),
         ).node
         self._cached_executions[array_value.node] = cached_replacement
diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py
index e7680d1d35..85664d8dc8 100644
--- a/bigframes/session/clients.py
+++ b/bigframes/session/clients.py
@@ -109,19 +109,27 @@ def __init__(
 
         # cloud clients initialized for lazy load
         self._bqclient = None
-        self._bqconnectionclient = None
-        self._bqstoragereadclient = None
-        self._cloudfunctionsclient = None
-        self._resourcemanagerclient = None
+        self._bqconnectionclient: Optional[
+            google.cloud.bigquery_connection_v1.ConnectionServiceClient
+        ] = None
+        self._bqstoragereadclient: Optional[
+            google.cloud.bigquery_storage_v1.BigQueryReadClient
+        ] = None
+        self._cloudfunctionsclient: Optional[
+            google.cloud.functions_v2.FunctionServiceClient
+        ] = None
+        self._resourcemanagerclient: Optional[
+            google.cloud.resourcemanager_v3.ProjectsClient
+        ] = None
 
     def _create_bigquery_client(self):
         bq_options = None
         if self._use_regional_endpoints:
-            # TODO(b/340896138): fix type error
             bq_options = google.api_core.client_options.ClientOptions(
                 api_endpoint=(
                     _BIGQUERY_REGIONAL_ENDPOINT
-                    if self._location.lower() in _REP_SUPPORTED_REGIONS  # type: ignore
+                    if self._location is not None
+                    and self._location.lower() in _REP_SUPPORTED_REGIONS
                     else _BIGQUERY_LOCATIONAL_ENDPOINT
                 ).format(location=self._location),
             )
@@ -159,11 +167,12 @@ def bqconnectionclient(self):
             bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo(
                 user_agent=self._application_name
             )
-            # TODO(b/340896138): fix type error
-            self._bqconnectionclient = google.cloud.bigquery_connection_v1.ConnectionServiceClient(  # type: ignore
-                client_info=bqconnection_info,
-                client_options=bqconnection_options,
-                credentials=self._credentials,
+            self._bqconnectionclient = (
+                google.cloud.bigquery_connection_v1.ConnectionServiceClient(
+                    client_info=bqconnection_info,
+                    client_options=bqconnection_options,
+                    credentials=self._credentials,
+                )
             )
 
         return self._bqconnectionclient
@@ -173,20 +182,19 @@ def bqstoragereadclient(self):
         if not self._bqstoragereadclient:
             bqstorage_options = None
             if self._use_regional_endpoints:
-                # TODO(b/340896138): fix type error
                 bqstorage_options = google.api_core.client_options.ClientOptions(
                     api_endpoint=(
                         _BIGQUERYSTORAGE_REGIONAL_ENDPOINT
-                        if self._location.lower() in _REP_SUPPORTED_REGIONS  # type: ignore
+                        if self._location is not None
+                        and self._location.lower() in _REP_SUPPORTED_REGIONS
                         else _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT
                     ).format(location=self._location),
                 )
             bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo(
                 user_agent=self._application_name
             )
-            # TODO(b/340896138): fix type error
             self._bqstoragereadclient = (
-                google.cloud.bigquery_storage_v1.BigQueryReadClient(  # type: ignore
+                google.cloud.bigquery_storage_v1.BigQueryReadClient(
                     client_info=bqstorage_info,
                     client_options=bqstorage_options,
                     credentials=self._credentials,
@@ -201,9 +209,8 @@ def cloudfunctionsclient(self):
             functions_info = google.api_core.gapic_v1.client_info.ClientInfo(
                 user_agent=self._application_name
             )
-            # TODO(b/340896138): fix type error
             self._cloudfunctionsclient = (
-                google.cloud.functions_v2.FunctionServiceClient(  # type: ignore
+                google.cloud.functions_v2.FunctionServiceClient(
                     client_info=functions_info,
                     credentials=self._credentials,
                 )
@@ -217,9 +224,8 @@ def resourcemanagerclient(self):
             resourcemanager_info = google.api_core.gapic_v1.client_info.ClientInfo(
                 user_agent=self._application_name
             )
-            # TODO(b/340896138): fix type error
             self._resourcemanagerclient = (
-                google.cloud.resourcemanager_v3.ProjectsClient(  # type: ignore
+                google.cloud.resourcemanager_v3.ProjectsClient(
                     credentials=self._credentials, client_info=resourcemanager_info
                 )
             )
diff --git a/bigframes/version.py b/bigframes/version.py
index 74a30e35b7..56a1200857 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.8.0"
+__version__ = "1.9.0"
diff --git a/mypy.ini b/mypy.ini
index 5707f14154..f0a005d2e5 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -9,6 +9,9 @@ ignore_missing_imports = True
 [mypy-cloudpickle.*]
 ignore_missing_imports = True
 
+[mypy-flask]
+ignore_missing_imports = True
+
 [mypy-pydata_google_auth]
 ignore_missing_imports = True
 
diff --git a/noxfile.py b/noxfile.py
index 52583bbf1a..177e0e2ab8 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -35,7 +35,15 @@
 # https://github.com/str0zzapreti/pytest-retry/issues/32
 PYTEST_VERSION = "pytest<8.0.0dev"
 SPHINX_VERSION = "sphinx==4.5.0"
-LINT_PATHS = ["docs", "bigframes", "tests", "third_party", "noxfile.py", "setup.py"]
+LINT_PATHS = [
+    "docs",
+    "bigframes",
+    "tests",
+    "third_party",
+    "noxfile.py",
+    "setup.py",
+    os.path.join("scripts", "benchmark"),
+]
 
 DEFAULT_PYTHON_VERSION = "3.10"
 
@@ -76,6 +84,8 @@
 SYSTEM_TEST_EXTRAS: List[str] = ["tests"]
 SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
 
+LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
+
 CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
 
 # Sessions are executed in the order so putting the smaller sessions
@@ -748,8 +758,6 @@ def notebook(session: nox.Session):
         "--nbmake-timeout=900",  # 15 minutes
     ]
 
-    logging_name_env_var = "BIGFRAMES_PERFORMANCE_LOG_NAME"
-
     try:
         # Populate notebook parameters and make a backup so that the notebooks
         # are runnable.
@@ -763,10 +771,10 @@ def notebook(session: nox.Session):
         # takes an environment variable for performance logging
         processes = []
         for notebook in notebooks:
-            session.env[logging_name_env_var] = os.path.basename(notebook)
             process = Process(
                 target=session.run,
                 args=(*pytest_command, notebook),
+                kwargs={"env": {LOGGING_NAME_ENV_VAR: os.path.basename(notebook)}},
             )
             process.start()
             processes.append(process)
@@ -788,11 +796,15 @@ def notebook(session: nox.Session):
     processes = []
     for notebook, regions in notebooks_reg.items():
         for region in regions:
-            session.env[logging_name_env_var] = os.path.basename(notebook)
             process = Process(
                 target=session.run,
                 args=(*pytest_command, notebook),
-                kwargs={"env": {"BIGQUERY_LOCATION": region}},
+                kwargs={
+                    "env": {
+                        "BIGQUERY_LOCATION": region,
+                        LOGGING_NAME_ENV_VAR: os.path.basename(notebook),
+                    }
+                },
             )
             process.start()
             processes.append(process)
@@ -803,34 +815,69 @@ def notebook(session: nox.Session):
     # when the environment variable is set as it is above,
     # notebooks output a .bytesprocessed and .slotmillis report
     # collect those reports and print a summary
-    _print_performance_report()
+    _print_performance_report("notebooks/")
+
+
+@nox.session(python=DEFAULT_PYTHON_VERSION)
+def benchmark(session: nox.Session):
+    session.install("-e", ".[all]")
+    base_path = os.path.join("scripts", "benchmark")
+
+    benchmark_script_list = list(Path(base_path).rglob("*.py"))
+    # Run benchmarks in parallel session.run's, since each benchmark
+    # takes an environment variable for performance logging
+    processes = []
+    for benchmark in benchmark_script_list:
+        process = Process(
+            target=session.run,
+            args=("python", benchmark),
+            kwargs={"env": {LOGGING_NAME_ENV_VAR: benchmark.as_posix()}},
+        )
+        process.start()
+        processes.append(process)
+
+    for process in processes:
+        process.join()
+
+    # when the environment variable is set as it is above,
+    # notebooks output a .bytesprocessed and .slotmillis report
+    # collect those reports and print a summary
+    _print_performance_report(base_path)
 
 
-def _print_performance_report():
+def _print_performance_report(path: str):
     """Add an informational report about http queries, bytes
     processed, and slot time to the testlog output for purposes
     of measuring bigquery-related performance changes.
+
+    Looks specifically for output files in subfolders of the
+    passed path. (*/*.bytesprocessed and */*.slotmillis)
     """
     print("---BIGQUERY USAGE REPORT---")
     results_dict = {}
-    for bytes_report in Path("notebooks/").glob("*/*.bytesprocessed"):
+    bytes_reports = sorted(Path(path).rglob("*.bytesprocessed"))
+    for bytes_report in bytes_reports:
         with open(bytes_report, "r") as bytes_file:
-            filename = bytes_report.stem
+            filename = bytes_report.relative_to(path).with_suffix("")
             lines = bytes_file.read().splitlines()
             query_count = len(lines)
             total_bytes = sum([int(line) for line in lines])
             results_dict[filename] = [query_count, total_bytes]
-    for millis_report in Path("notebooks/").glob("*/*.slotmillis"):
+        os.remove(bytes_report)
+
+    millis_reports = sorted(Path(path).rglob("*.slotmillis"))
+    for millis_report in millis_reports:
         with open(millis_report, "r") as millis_file:
-            filename = millis_report.stem
+            filename = millis_report.relative_to(path).with_suffix("")
             lines = millis_file.read().splitlines()
             total_slot_millis = sum([int(line) for line in lines])
             results_dict[filename] += [total_slot_millis]
+        os.remove(millis_report)
 
     cumulative_queries = 0
     cumulative_bytes = 0
     cumulative_slot_millis = 0
-    for results in results_dict.values():
+    for name, results in results_dict.items():
         if len(results) != 3:
             raise IOError(
                 "Mismatch in performance logging output. "
@@ -842,7 +889,7 @@ def _print_performance_report():
         cumulative_bytes += total_bytes
         cumulative_slot_millis += total_slot_millis
         print(
-            f"{filename} - query count: {query_count},"
+            f"{name} - query count: {query_count},"
             f" bytes processed sum: {total_bytes},"
             f" slot millis sum: {total_slot_millis}"
         )
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q1.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q1.py
new file mode 100644
index 0000000000..cc5f77b49b
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q1.py
@@ -0,0 +1,14 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 1: sum v1 by id1")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby("id1", as_index=False, dropna=False).agg({"v1": "sum"})
+print(ans.shape)
+chk = [ans["v1"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q2.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q2.py
new file mode 100644
index 0000000000..734a17242b
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q2.py
@@ -0,0 +1,14 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 2: sum v1 by id1:id2")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby(["id1", "id2"], as_index=False, dropna=False).agg({"v1": "sum"})
+print(ans.shape)
+chk = [ans["v1"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q3.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q3.py
new file mode 100644
index 0000000000..242902de64
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q3.py
@@ -0,0 +1,14 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 3: sum v1 mean v3 by id3")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby("id3", as_index=False, dropna=False).agg({"v1": "sum", "v3": "mean"})
+print(ans.shape)
+chk = [ans["v1"].sum(), ans["v3"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q4.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q4.py
new file mode 100644
index 0000000000..e4f769545e
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q4.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 4: mean v1:v3 by id4")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby("id4", as_index=False, dropna=False).agg(
+    {"v1": "mean", "v2": "mean", "v3": "mean"}
+)
+print(ans.shape)
+chk = [ans["v1"].sum(), ans["v2"].sum(), ans["v3"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q5.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q5.py
new file mode 100644
index 0000000000..d34a6c055f
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q5.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 5: sum v1:v3 by id6")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby("id6", as_index=False, dropna=False).agg(
+    {"v1": "sum", "v2": "sum", "v3": "sum"}
+)
+print(ans.shape)
+chk = [ans["v1"].sum(), ans["v2"].sum(), ans["v3"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q6.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q6.py
new file mode 100644
index 0000000000..0f3240a129
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q6.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 6: median v3 sd v3 by id4 id5")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = x.groupby(["id4", "id5"], as_index=False, dropna=False).agg(
+    {"v3": ["median", "std"]}
+)
+print(ans.shape)
+chk = [ans["v3"]["median"].sum(), ans["v3"]["std"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q7.py b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q7.py
new file mode 100644
index 0000000000..78e1e94b85
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/groupby/G1_1e9_1e2_5_0/q7.py
@@ -0,0 +1,18 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Groupby benchmark 7: max v1 - min v2 by id3")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
+
+ans = (
+    x.groupby("id3", as_index=False, dropna=False)
+    .agg({"v1": "max", "v2": "min"})
+    .assign(range_v1_v2=lambda x: x["v1"] - x["v2"])[["id3", "range_v1_v2"]]
+)
+print(ans.shape)
+chk = [ans["range_v1_v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q1.py b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q1.py
new file mode 100644
index 0000000000..429dc72ad0
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q1.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Join benchmark 1: small inner on int")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
+small = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e3_0_0")
+
+ans = x.merge(small, on="id1")
+print(ans.shape)
+
+chk = [ans["v1"].sum(), ans["v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q2.py b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q2.py
new file mode 100644
index 0000000000..210c29f844
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q2.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Join benchmark 2: medium inner on int")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
+medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
+
+ans = x.merge(medium, on="id2")
+print(ans.shape)
+
+chk = [ans["v1"].sum(), ans["v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q3.py b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q3.py
new file mode 100644
index 0000000000..d88d943604
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q3.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Join benchmark 3: medium outer on int")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
+medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
+
+ans = x.merge(medium, how="left", on="id2")
+print(ans.shape)
+
+chk = [ans["v1"].sum(), ans["v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q4.py b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q4.py
new file mode 100644
index 0000000000..9167043d9a
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q4.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Join benchmark 4: medium inner on factor")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
+medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
+
+ans = x.merge(medium, on="id5")
+print(ans.shape)
+
+chk = [ans["v1"].sum(), ans["v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q5.py b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q5.py
new file mode 100644
index 0000000000..39eb23ac45
--- /dev/null
+++ b/scripts/benchmark/db-benchmark/join/J1_1e9_NA_0_0/q5.py
@@ -0,0 +1,16 @@
+# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
+
+import bigframes.pandas as bpd
+
+print("Join benchmark 5: big inner on int")
+
+x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
+big = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e9_0_0")
+
+ans = x.merge(big, on="id3")
+print(ans.shape)
+
+chk = [ans["v1"].sum(), ans["v2"].sum()]
+print(chk)
+
+bpd.reset_session()
diff --git a/scripts/benchmark/db-benchmark/sort b/scripts/benchmark/db-benchmark/sort
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/scripts/benchmark/simple_benchmark.py b/scripts/benchmark/simple_benchmark.py
new file mode 100644
index 0000000000..53b35c52ad
--- /dev/null
+++ b/scripts/benchmark/simple_benchmark.py
@@ -0,0 +1,27 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes.pandas as bpd
+
+# This is a placeholder benchmark.
+# TODO(340278185): Add more data analysis tasks and benchmark files
+# like this one.
+
+print("Performing simple benchmark.")
+df = bpd.DataFrame()
+df["column_1"] = bpd.Series([i for i in range(100000)])
+df["column_2"] = bpd.Series([i * 2 for i in range(100000)])
+df["column_3"] = df["column_1"] + df["column_2"]
+df.__repr__()
+bpd.reset_session()
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 2501693084..06ad73a702 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -139,6 +139,17 @@ def session() -> Generator[bigframes.Session, None, None]:
     session.close()  # close generated session at cleanup time
 
 
+@pytest.fixture(scope="session")
+def unordered_session() -> Generator[bigframes.Session, None, None]:
+    context = bigframes.BigQueryOptions(
+        location="US",
+    )
+    session = bigframes.Session(context=context)
+    session._strictly_ordered = False
+    yield session
+    session.close()  # close generated session at cleanup type
+
+
 @pytest.fixture(scope="session")
 def session_tokyo(tokyo_location: str) -> Generator[bigframes.Session, None, None]:
     context = bigframes.BigQueryOptions(
@@ -946,6 +957,18 @@ def penguins_randomforest_classifier_model_name(
         return model_name
 
 
+@pytest.fixture(scope="session")
+def llm_fine_tune_df_default_index(
+    session: bigframes.Session,
+) -> bigframes.dataframe.DataFrame:
+    training_table_name = "llm_tuning.emotion_classification_train"
+    df = session.read_gbq(training_table_name).dropna().head(30)
+    prefix = "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: "
+    df["prompt"] = prefix + df["text"]
+    df["label"] = df["label"].astype("string")
+    return df
+
+
 @pytest.fixture(scope="session")
 def usa_names_grouped_table(
     session: bigframes.Session, dataset_id_permanent
diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py
index 79deb615b1..438177b1a0 100644
--- a/tests/system/large/ml/test_forecasting.py
+++ b/tests/system/large/ml/test_forecasting.py
@@ -127,8 +127,7 @@ def test_arima_plus_model_fit_params(time_series_df_default_index, dataset_id):
     assert reloaded_model.horizon == 100
     assert reloaded_model.auto_arima is True
     assert reloaded_model.auto_arima_max_order == 4
-    # TODO(garrettwu): now BQML doesn't populate auto_arima_min_order
-    # assert reloaded_model.auto_arima_min_order == 1
+    assert reloaded_model.auto_arima_min_order == 1
     assert reloaded_model.data_frequency == "DAILY"
     assert reloaded_model.holiday_region == "US"
     assert reloaded_model.clean_spikes_and_dips is False
diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index eaf666fd50..0cc9fc5353 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -192,8 +192,6 @@ def test_logistic_regression_customized_params_fit_score(
         f"{dataset_id}.temp_configured_logistic_reg_model"
         in reloaded_model._bqml_model.model_name
     )
-    # TODO(garrettwu) optimize_strategy isn't logged in BQML
-    # assert reloaded_model.optimize_strategy == "BATCH_GRADIENT_DESCENT"
     assert reloaded_model.fit_intercept is False
     assert reloaded_model.class_weight == "balanced"
     assert reloaded_model.calculate_p_values is False
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index cce49ea187..6bfc9f0da3 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -92,17 +92,14 @@ def make_uniq_udf(udf):
         target_code = source_code.replace(source_key, target_key, 1)
         f.write(target_code)
     spec = importlib.util.spec_from_file_location(udf_file_name, udf_file_path)
-    # TODO(b/340875260): fix type error
-    udf_uniq = getattr(spec.loader.load_module(), udf_uniq_name)  # type: ignore
-
-    # This is a bit of a hack but we need to remove the reference to a foreign
-    # module, otherwise the serialization would keep the foreign module
-    # reference and deserialization would fail with error like following:
-    #     ModuleNotFoundError: No module named 'add_one_2nxcmd9j'
-    # TODO(shobs): Figure out if there is a better way of generating the unique
-    # function object, but for now let's just set it to same module as the
-    # original udf.
-    udf_uniq.__module__ = udf.__module__
+
+    assert (spec is not None) and (spec.loader is not None)
+    module = importlib.util.module_from_spec(spec)
+
+    # exec_module fills the module object with all the functions, classes, and
+    # variables defined in the module file.
+    spec.loader.exec_module(module)
+    udf_uniq = getattr(module, udf_uniq_name)
 
     return udf_uniq, tmpdir
 
diff --git a/tests/system/load/test_large_tables.py b/tests/system/load/test_large_tables.py
index cf1c787a58..f92207b191 100644
--- a/tests/system/load/test_large_tables.py
+++ b/tests/system/load/test_large_tables.py
@@ -75,22 +75,17 @@ def test_index_repr_large_table():
 
 
 def test_to_pandas_batches_large_table():
-    df = bpd.read_gbq("load_testing.scalars_10gb")
-    # df will be downloaded locally
-    expected_row_count, expected_column_count = df.shape
-
-    row_count = 0
-    # TODO(b/340890167): fix type error
-    for df in df.to_pandas_batches():  # type: ignore
-        batch_row_count, batch_column_count = df.shape
+    df = bpd.read_gbq("load_testing.scalars_1tb")
+    _, expected_column_count = df.shape
+
+    # download only a few batches, since 1tb would be too much
+    iterable = df.to_pandas_batches(page_size=500, max_results=1500)
+    # use page size since client library doesn't support
+    # streaming only part of the dataframe via bqstorage
+    for pdf in iterable:
+        batch_row_count, batch_column_count = pdf.shape
         assert batch_column_count == expected_column_count
-        row_count += batch_row_count
-
-        # Attempt to save on memory by manually removing the batch df
-        # from local memory after finishing with processing.
-        del df
-
-    assert row_count == expected_row_count
+        assert batch_row_count > 0
 
 
 @pytest.mark.skip(reason="See if it caused kokoro build aborted.")
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
index fd13662275..fd047b3ba6 100644
--- a/tests/system/load/test_llm.py
+++ b/tests/system/load/test_llm.py
@@ -18,18 +18,6 @@
 import bigframes.ml.llm
 
 
-@pytest.fixture(scope="session")
-def llm_fine_tune_df_default_index(
-    session: bigframes.Session,
-) -> bigframes.dataframe.DataFrame:
-    training_table_name = "llm_tuning.emotion_classification_train"
-    df = session.read_gbq(training_table_name)
-    prefix = "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: "
-    df["prompt"] = prefix + df["text"]
-    df["label"] = df["label"].astype("string")
-    return df
-
-
 @pytest.fixture(scope="session")
 def llm_remote_text_pandas_df():
     """Additional data matching the penguins dataset, with a new index"""
@@ -55,9 +43,8 @@ def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_
         model_name="text-bison", max_iterations=1
     )
 
-    df = llm_fine_tune_df_default_index.dropna().sample(n=100)
-    X_train = df[["prompt"]]
-    y_train = df[["label"]]
+    X_train = llm_fine_tune_df_default_index[["prompt"]]
+    y_train = llm_fine_tune_df_default_index[["label"]]
     model.fit(X_train, y_train)
 
     assert model is not None
@@ -112,3 +99,30 @@ def test_llm_palm_score_params(llm_fine_tune_df_default_index):
         "evaluation_status",
     ]
     assert all(col in score_result_col for col in expected_col)
+
+
+@pytest.mark.flaky(retries=2)
+def test_llm_gemini_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_df):
+    model = bigframes.ml.llm.GeminiTextGenerator(
+        model_name="gemini-pro", max_iterations=1
+    )
+
+    X_train = llm_fine_tune_df_default_index[["prompt"]]
+    y_train = llm_fine_tune_df_default_index[["label"]]
+    model.fit(X_train, y_train)
+
+    assert model is not None
+
+    df = model.predict(
+        llm_remote_text_df["prompt"],
+        temperature=0.5,
+        max_output_tokens=100,
+        top_k=20,
+        top_p=0.5,
+    ).to_pandas()
+    assert df.shape == (3, 4)
+    assert "ml_generate_text_llm_result" in df.columns
+    series = df["ml_generate_text_llm_result"]
+    assert all(series.str.len() == 1)
+
+    # TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
diff --git a/tests/system/small/bigquery/test_vector_search.py b/tests/system/small/bigquery/test_vector_search.py
new file mode 100644
index 0000000000..4280c0a888
--- /dev/null
+++ b/tests/system/small/bigquery/test_vector_search.py
@@ -0,0 +1,136 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pandas as pd
+
+import bigframes.bigquery as bbq
+import bigframes.pandas as bpd
+
+
+def test_vector_search_basic_params_with_df():
+    search_query = bpd.DataFrame(
+        {
+            "query_id": ["dog", "cat"],
+            "embedding": [[1.0, 2.0], [3.0, 5.2]],
+        }
+    )
+    vector_search_result = bbq.vector_search(
+        base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        column_to_search="my_embedding",
+        query=search_query,
+        top_k=2,
+    ).to_pandas()  # type:ignore
+    expected = pd.DataFrame(
+        {
+            "query_id": ["cat", "dog", "dog", "cat"],
+            "embedding": [
+                np.array([3.0, 5.2]),
+                np.array([1.0, 2.0]),
+                np.array([1.0, 2.0]),
+                np.array([3.0, 5.2]),
+            ],
+            "id": [5, 1, 4, 2],
+            "my_embedding": [
+                np.array([5.0, 5.4]),
+                np.array([1.0, 2.0]),
+                np.array([1.0, 3.2]),
+                np.array([2.0, 4.0]),
+            ],
+            "distance": [2.009975, 0.0, 1.2, 1.56205],
+        },
+        index=pd.Index([1, 0, 0, 1], dtype="Int64"),
+    )
+    pd.testing.assert_frame_equal(
+        vector_search_result, expected, check_dtype=False, rtol=0.1
+    )
+
+
+def test_vector_search_different_params_with_query():
+    search_query = bpd.Series([[1.0, 2.0], [3.0, 5.2]])
+    vector_search_result = bbq.vector_search(
+        base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        column_to_search="my_embedding",
+        query=search_query,
+        distance_type="cosine",
+        top_k=2,
+    ).to_pandas()  # type:ignore
+    expected = pd.DataFrame(
+        {
+            "0": [
+                np.array([1.0, 2.0]),
+                np.array([1.0, 2.0]),
+                np.array([3.0, 5.2]),
+                np.array([3.0, 5.2]),
+            ],
+            "id": [2, 1, 1, 2],
+            "my_embedding": [
+                np.array([2.0, 4.0]),
+                np.array([1.0, 2.0]),
+                np.array([1.0, 2.0]),
+                np.array([2.0, 4.0]),
+            ],
+            "distance": [0.0, 0.0, 0.001777, 0.001777],
+        },
+        index=pd.Index([0, 0, 1, 1], dtype="Int64"),
+    )
+    pd.testing.assert_frame_equal(
+        vector_search_result, expected, check_dtype=False, rtol=0.1
+    )
+
+
+def test_vector_search_df_with_query_column_to_search():
+    search_query = bpd.DataFrame(
+        {
+            "query_id": ["dog", "cat"],
+            "embedding": [[1.0, 2.0], [3.0, 5.2]],
+            "another_embedding": [[1.0, 2.5], [3.3, 5.2]],
+        }
+    )
+    vector_search_result = bbq.vector_search(
+        base_table="bigframes-dev.bigframes_tests_sys.base_table",
+        column_to_search="my_embedding",
+        query=search_query,
+        query_column_to_search="another_embedding",
+        top_k=2,
+    ).to_pandas()  # type:ignore
+    expected = pd.DataFrame(
+        {
+            "query_id": ["dog", "dog", "cat", "cat"],
+            "embedding": [
+                np.array([1.0, 2.0]),
+                np.array([1.0, 2.0]),
+                np.array([3.0, 5.2]),
+                np.array([3.0, 5.2]),
+            ],
+            "another_embedding": [
+                np.array([1.0, 2.5]),
+                np.array([1.0, 2.5]),
+                np.array([3.3, 5.2]),
+                np.array([3.3, 5.2]),
+            ],
+            "id": [1, 4, 2, 5],
+            "my_embedding": [
+                np.array([1.0, 2.0]),
+                np.array([1.0, 3.2]),
+                np.array([2.0, 4.0]),
+                np.array([5.0, 5.4]),
+            ],
+            "distance": [0.5, 0.7, 1.769181, 1.711724],
+        },
+        index=pd.Index([0, 0, 1, 1], dtype="Int64"),
+    )
+    pd.testing.assert_frame_equal(
+        vector_search_result, expected, check_dtype=False, rtol=0.1
+    )
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 20e8dd0c19..36d01e126f 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -15,6 +15,7 @@
 import pytest
 
 from bigframes.ml import llm
+from tests.system import utils
 
 
 def test_create_text_generator_model(
@@ -366,3 +367,48 @@ def test_gemini_text_generator_predict_with_params_success(
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
+
+
+@pytest.mark.flaky(retries=2)
+def test_llm_gemini_pro_score(llm_fine_tune_df_default_index):
+    model = llm.GeminiTextGenerator(model_name="gemini-pro")
+
+    # Check score to ensure the model was fitted
+    score_result = model.score(
+        X=llm_fine_tune_df_default_index[["prompt"]],
+        y=llm_fine_tune_df_default_index[["label"]],
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        score_result,
+        columns=[
+            "bleu4_score",
+            "rouge-l_precision",
+            "rouge-l_recall",
+            "rouge-l_f1_score",
+            "evaluation_status",
+        ],
+        index=1,
+    )
+
+
+@pytest.mark.flaky(retries=2)
+def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index):
+    model = llm.GeminiTextGenerator(model_name="gemini-pro")
+
+    # Check score to ensure the model was fitted
+    score_result = model.score(
+        X=llm_fine_tune_df_default_index["prompt"],
+        y=llm_fine_tune_df_default_index["label"],
+        task_type="classification",
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        score_result,
+        columns=[
+            "precision",
+            "recall",
+            "f1_score",
+            "label",
+            "evaluation_status",
+        ],
+        index=6,
+    )
diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py
index 9aff2fe773..81e1b2f77f 100644
--- a/tests/system/small/ml/test_metrics.py
+++ b/tests/system/small/ml/test_metrics.py
@@ -219,8 +219,8 @@ def test_roc_curve_binary_classification_prediction_matches_sklearn(session):
     )
 
     # sklearn returns float64 np arrays
-    np_fpr = fpr.to_pandas().astype("float64").array
-    np_tpr = tpr.to_pandas().astype("float64").array
+    np_fpr = fpr.to_pandas().astype("float64").array.to_numpy()
+    np_tpr = tpr.to_pandas().astype("float64").array.to_numpy()
     np_thresholds = thresholds.to_pandas().astype("float64").array
 
     np.testing.assert_array_equal(
@@ -228,14 +228,12 @@ def test_roc_curve_binary_classification_prediction_matches_sklearn(session):
         np_thresholds[1:],
         expected_thresholds[1:],
     )
-    # TODO(b/340872435): fix type error
     np.testing.assert_array_equal(
-        np_fpr,  # type: ignore
+        np_fpr,
         expected_fpr,
     )
-    # TODO(b/340872435): fix type error
     np.testing.assert_array_equal(
-        np_tpr,  # type: ignore
+        np_tpr,
         expected_tpr,
     )
 
@@ -316,8 +314,8 @@ def test_roc_curve_binary_classification_decision_matches_sklearn(session):
     )
 
     # sklearn returns float64 np arrays
-    np_fpr = fpr.to_pandas().astype("float64").array
-    np_tpr = tpr.to_pandas().astype("float64").array
+    np_fpr = fpr.to_pandas().astype("float64").array.to_numpy()
+    np_tpr = tpr.to_pandas().astype("float64").array.to_numpy()
     np_thresholds = thresholds.to_pandas().astype("float64").array
 
     np.testing.assert_array_equal(
@@ -325,14 +323,12 @@ def test_roc_curve_binary_classification_decision_matches_sklearn(session):
         np_thresholds[1:],
         expected_thresholds[1:],
     )
-    # TODO(b/340872435): fix type error
     np.testing.assert_array_equal(
-        np_fpr,  # type: ignore
+        np_fpr,
         expected_fpr,
     )
-    # TODO(b/340872435): fix type error
     np.testing.assert_array_equal(
-        np_tpr,  # type: ignore
+        np_tpr,
         expected_tpr,
     )
 
@@ -519,14 +515,10 @@ def test_confusion_matrix_column_index(session):
     ).astype("Int64")
     df = session.read_pandas(pd_df)
     confusion_matrix = metrics.confusion_matrix(df[["y_true"]], df[["y_pred"]])
-    # TODO(b/340872435): fix type error
-    expected_pd_df = (
-        pd.DataFrame(  # type: ignore
-            {1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]}
-        )
-        .astype("int64")
-        .set_index([pd.Index([1, 2, 3, 4])])
-    )
+    expected_pd_df = pd.DataFrame(
+        {1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]},
+        index=[1, 2, 3, 4],
+    ).astype("int64")
     pd.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
@@ -562,9 +554,8 @@ def test_confusion_matrix_str_matches_sklearn(session):
     expected_confusion_matrix = sklearn_metrics.confusion_matrix(
         pd_df[["y_true"]], pd_df[["y_pred"]]
     )
-    # TODO(b/340872435): fix type error
-    expected_pd_df = pd.DataFrame(expected_confusion_matrix).set_index(  # type: ignore
-        [pd.Index(["ant", "bird", "cat"])]
+    expected_pd_df = pd.DataFrame(
+        expected_confusion_matrix, index=["ant", "bird", "cat"]
     )
     expected_pd_df.columns = pd.Index(["ant", "bird", "cat"])
     pd.testing.assert_frame_equal(
@@ -601,9 +592,8 @@ def test_recall_score(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
     recall = metrics.recall_score(
-        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None  # type: ignore
+        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
     )
     expected_values = [1.000000, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
@@ -620,8 +610,7 @@ def test_recall_score_matches_sklearn(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)  # type: ignore
+    recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)
     expected_values = sklearn_metrics.recall_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
     )
@@ -638,8 +627,7 @@ def test_recall_score_str_matches_sklearn(session):
         }
     ).astype("str")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)  # type: ignore
+    recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)
     expected_values = sklearn_metrics.recall_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
     )
@@ -656,8 +644,7 @@ def test_recall_score_series(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    recall = metrics.recall_score(df["y_true"], df["y_pred"], average=None)  # type: ignore
+    recall = metrics.recall_score(df["y_true"], df["y_pred"], average=None)
     expected_values = [1.000000, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
     expected_recall = pd.Series(expected_values, index=expected_index)
@@ -673,9 +660,8 @@ def test_precision_score(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
     precision_score = metrics.precision_score(
-        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None  # type: ignore
+        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
     )
     expected_values = [0.666667, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
@@ -696,7 +682,7 @@ def test_precision_score_matches_sklearn(session):
     df = session.read_pandas(pd_df)
     # TODO(b/340872435): fix type error
     precision_score = metrics.precision_score(
-        df[["y_true"]], df[["y_pred"]], average=None  # type: ignore
+        df[["y_true"]], df[["y_pred"]], average=None
     )
     expected_values = sklearn_metrics.precision_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
@@ -716,9 +702,8 @@ def test_precision_score_str_matches_sklearn(session):
         }
     ).astype("str")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
     precision_score = metrics.precision_score(
-        df[["y_true"]], df[["y_pred"]], average=None  # type: ignore
+        df[["y_true"]], df[["y_pred"]], average=None
     )
     expected_values = sklearn_metrics.precision_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
@@ -738,8 +723,7 @@ def test_precision_score_series(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    precision_score = metrics.precision_score(df["y_true"], df["y_pred"], average=None)  # type: ignore
+    precision_score = metrics.precision_score(df["y_true"], df["y_pred"], average=None)
     expected_values = [0.666667, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
     expected_precision = pd.Series(expected_values, index=expected_index)
@@ -757,9 +741,8 @@ def test_f1_score(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
     f1_score = metrics.f1_score(
-        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None  # type: ignore
+        df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
     )
     expected_values = [0.8, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
@@ -776,8 +759,7 @@ def test_f1_score_matches_sklearn(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)  # type: ignore
+    f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)
     expected_values = sklearn_metrics.f1_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
     )
@@ -794,8 +776,7 @@ def test_f1_score_str_matches_sklearn(session):
         }
     ).astype("str")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)  # type: ignore
+    f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)
     expected_values = sklearn_metrics.f1_score(
         pd_df[["y_true"]], pd_df[["y_pred"]], average=None
     )
@@ -812,8 +793,7 @@ def test_f1_score_series(session):
         }
     ).astype("Int64")
     df = session.read_pandas(pd_df)
-    # TODO(b/340872435): fix type error
-    f1_score = metrics.f1_score(df["y_true"], df["y_pred"], average=None)  # type: ignore
+    f1_score = metrics.f1_score(df["y_true"], df["y_pred"], average=None)
     expected_values = [0.8, 0.000000, 0.666667]
     expected_index = [0, 1, 2]
     expected_f1 = pd.Series(expected_values, index=expected_index)
diff --git a/tests/system/small/ml/test_model_selection.py b/tests/system/small/ml/test_model_selection.py
index ca14186a4d..63d0840d29 100644
--- a/tests/system/small/ml/test_model_selection.py
+++ b/tests/system/small/ml/test_model_selection.py
@@ -130,17 +130,12 @@ def test_train_test_split_seeded_correct_rows(
         X, y, random_state=42
     )
 
-    # TODO(b/340876926): fix type error
-    X_train = X_train.to_pandas().sort_index()  # type: ignore
-    # TODO(b/340876926): fix type error
-    X_test = X_test.to_pandas().sort_index()  # type: ignore
-    # TODO(b/340876926): fix type error
-    y_train = y_train.to_pandas().sort_index()  # type: ignore
-    # TODO(b/340876926): fix type error
-    y_test = y_test.to_pandas().sort_index()  # type: ignore
-
-    # TODO(b/340876926): fix type error
-    train_index = pd.Index(  # type: ignore
+    X_train_sorted = X_train.to_pandas().sort_index()
+    X_test_sorted = X_test.to_pandas().sort_index()
+    y_train_sorted = y_train.to_pandas().sort_index()
+    y_test_sorted = y_test.to_pandas().sort_index()
+
+    train_index: pd.Index = pd.Index(
         [
             144,
             146,
@@ -167,15 +162,20 @@ def test_train_test_split_seeded_correct_rows(
         dtype="Int64",
         name="rowindex",
     )
-    # TODO(b/340876926): fix type error
-    test_index = pd.Index(  # type: ignore
+    test_index: pd.Index = pd.Index(
         [148, 161, 226, 269, 278, 289, 291], dtype="Int64", name="rowindex"
     )
 
     all_data.index.name = "_"
-    # TODO(b/340876926): fix type error
+
+    assert (
+        isinstance(X_train_sorted, pd.DataFrame)
+        and isinstance(X_test_sorted, pd.DataFrame)
+        and isinstance(y_train_sorted, pd.DataFrame)
+        and isinstance(y_test_sorted, pd.DataFrame)
+    )
     pd.testing.assert_frame_equal(
-        X_train,  # type: ignore
+        X_train_sorted,
         all_data[
             [
                 "species",
@@ -184,9 +184,8 @@ def test_train_test_split_seeded_correct_rows(
             ]
         ].loc[train_index],
     )
-    # TODO(b/340876926): fix type error
     pd.testing.assert_frame_equal(
-        X_test,  # type: ignore
+        X_test_sorted,
         all_data[
             [
                 "species",
@@ -195,18 +194,16 @@ def test_train_test_split_seeded_correct_rows(
             ]
         ].loc[test_index],
     )
-    # TODO(b/340876926): fix type error
     pd.testing.assert_frame_equal(
-        y_train,  # type: ignore
+        y_train_sorted,
         all_data[
             [
                 "body_mass_g",
             ]
         ].loc[train_index],
     )
-    # TODO(b/340876926): fix type error
     pd.testing.assert_frame_equal(
-        y_test,  # type: ignore
+        y_test_sorted,
         all_data[
             [
                 "body_mass_g",
diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py
index 838bc11108..c5c649c638 100644
--- a/tests/system/small/operations/test_datetimes.py
+++ b/tests/system/small/operations/test_datetimes.py
@@ -310,9 +310,8 @@ def test_dt_floor(scalars_dfs, col_name, freq):
 def test_dt_compare_coerce_str_datetime(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df["datetime_col"]
-    # TODO(b/340878286): fix type error
-    bf_result = (bf_series >= "2024-01-01").to_pandas()  # type: ignore
 
+    bf_result = (bf_series >= "2024-01-01").to_pandas()
     pd_result = scalars_pandas_df["datetime_col"] >= pd.to_datetime("2024-01-01")
 
     # pandas produces pyarrow bool dtype
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index eae25bb027..d5854bd8d0 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -17,7 +17,7 @@
 import sys
 import tempfile
 import typing
-from typing import Tuple
+from typing import Dict, List, Tuple
 
 import geopandas as gpd  # type: ignore
 import numpy as np
@@ -146,9 +146,9 @@ def test_df_construct_inline_respects_location():
     with bpd.option_context("bigquery.location", "europe-west1"):
         df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
         repr(df)
+        assert df.query_job is not None
+        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
 
-        # TODO(b/340876936): fix type error
-        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)  # type: ignore
         assert table.location == "europe-west1"
 
 
@@ -753,10 +753,9 @@ def test_assign_listlike_to_empty_df(session):
 def test_assign_to_empty_df_multiindex_error(session):
     empty_df = dataframe.DataFrame(session=session)
     empty_pandas_df = pd.DataFrame()
-    # TODO(b/340876936): fix type error
-    empty_df["empty_col_1"] = []  # type: ignore
-    # TODO(b/340876936): fix type error
-    empty_df["empty_col_2"] = []  # type: ignore
+
+    empty_df["empty_col_1"] = typing.cast(series.Series, [])
+    empty_df["empty_col_2"] = typing.cast(series.Series, [])
     empty_pandas_df["empty_col_1"] = []
     empty_pandas_df["empty_col_2"] = []
     empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
@@ -1340,40 +1339,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
 
 def test_get_dtypes(scalars_df_default_index):
     dtypes = scalars_df_default_index.dtypes
+    dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
+        "bool_col": pd.BooleanDtype(),
+        "bytes_col": pd.ArrowDtype(pa.binary()),
+        "date_col": pd.ArrowDtype(pa.date32()),
+        "datetime_col": pd.ArrowDtype(pa.timestamp("us")),
+        "geography_col": gpd.array.GeometryDtype(),
+        "int64_col": pd.Int64Dtype(),
+        "int64_too": pd.Int64Dtype(),
+        "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
+        "float64_col": pd.Float64Dtype(),
+        "rowindex": pd.Int64Dtype(),
+        "rowindex_2": pd.Int64Dtype(),
+        "string_col": pd.StringDtype(storage="pyarrow"),
+        "time_col": pd.ArrowDtype(pa.time64("us")),
+        "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+    }
     pd.testing.assert_series_equal(
         dtypes,
-        pd.Series(
-            {
-                # TODO(b/340876936): fix type error
-                "bool_col": pd.BooleanDtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "bytes_col": pd.ArrowDtype(pa.binary()),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "date_col": pd.ArrowDtype(pa.date32()),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "datetime_col": pd.ArrowDtype(pa.timestamp("us")),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "geography_col": gpd.array.GeometryDtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "int64_col": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "int64_too": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "float64_col": pd.Float64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "rowindex": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "rowindex_2": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "string_col": pd.StringDtype(storage="pyarrow"),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "time_col": pd.ArrowDtype(pa.time64("us")),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),  # type: ignore
-            }
-        ),
+        pd.Series(dtypes_dict),
     )
 
 
@@ -1828,10 +1812,9 @@ def test_df_update(overwrite, filter_func):
     if pd.__version__.startswith("1."):
         pytest.skip("dtype handled differently in pandas 1.x.")
 
-    # TODO(b/340876936): fix type error
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")  # type: ignore
-    # TODO(b/340876936): fix type error
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")  # type: ignore
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
     pd_df1 = pandas.DataFrame(
         {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
     )
@@ -1891,10 +1874,10 @@ def test_df_idxmax():
     ],
 )
 def test_df_align(join, axis):
-    # TODO(b/340876936): fix type error
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")  # type: ignore
-    # TODO(b/340876936): fix type error
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")  # type: ignore
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
     pd_df1 = pandas.DataFrame(
         {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
     )
@@ -1911,10 +1894,11 @@ def test_df_align(join, axis):
     pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
 
     # Don't check dtype as pandas does unnecessary float conversion
-    # TODO(b/340876936): fix type error
-    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)  # type: ignore
-    # TODO(b/340876936): fix type error
-    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)  # type: ignore
+    assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
+        bf_result2, dataframe.DataFrame
+    )
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
 
 
 def test_combine_first(
@@ -2568,11 +2552,15 @@ def test_df_transpose():
     # Include some floats to ensure type coercion
     values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
     # Test complex case of both axes being multi-indices with non-unique elements
-    # TODO(b/340876936): fix type error
-    columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow"))  # type: ignore
+
+    columns: pandas.Index = pd.Index(
+        ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
+    )
     columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
-    # TODO(b/340876936): fix type error
-    index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow"))  # type: ignore
+
+    index: pandas.Index = pd.Index(
+        ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
+    )
     rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
 
     pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
@@ -3124,9 +3112,9 @@ def test_dataframe_aggregate_int(scalars_df_index, scalars_pandas_df_index, col,
 
     # Check dtype separately
     assert bf_result.dtype == "Int64"
-
+    # Is otherwise "object" dtype
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
     # Pandas may produce narrower numeric types
-    # Pandas has object index type
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
 
 
@@ -3146,6 +3134,7 @@ def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col
 
     # Pandas may produce narrower numeric types
     # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
 
 
@@ -3183,6 +3172,7 @@ def test_dataframe_aggregates(
 
     # Pandas may produce narrower numeric types, but bigframes always produces Float64
     # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
     assert_series_equal(
         pd_result,
         bf_result,
@@ -3740,10 +3730,9 @@ def test_df_setattr_index():
         [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
     )
     bf_df = dataframe.DataFrame(pd_df)
-    # TODO(b/340876936): fix type error
-    pd_df.index = [4, 5]  # type: ignore
-    # TODO(b/340876936): fix type error
-    bf_df.index = [4, 5]  # type: ignore
+
+    pd_df.index = pandas.Index([4, 5])
+    bf_df.index = [4, 5]
 
     assert_pandas_df_equal(
         pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3755,10 +3744,10 @@ def test_df_setattr_columns():
         [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
     )
     bf_df = dataframe.DataFrame(pd_df)
-    # TODO(b/340876936): fix type error
-    pd_df.columns = [4, 5, 6]  # type: ignore
-    # TODO(b/340876936): fix type error
-    bf_df.columns = [4, 5, 6]  # type: ignore
+
+    pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
+
+    bf_df.columns = pandas.Index([4, 5, 6])
 
     assert_pandas_df_equal(
         pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3852,8 +3841,8 @@ def test_iloc_list_multiindex(scalars_dfs):
 
 
 def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
-    # TODO(b/340876936): fix type error
-    index_list = []  # type: ignore
+
+    index_list: List[int] = []
 
     bf_result = scalars_df_index.iloc[index_list]
     pd_result = scalars_pandas_df_index.iloc[index_list]
diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py
index 088211d7fc..428a6a28bf 100644
--- a/tests/system/small/test_encryption.py
+++ b/tests/system/small/test_encryption.py
@@ -242,14 +242,12 @@ def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
     model.fit(X_train, y_train)
 
     assert model is not None
-    # TODO(b/340879287): fix type error
-    assert model._bqml_model.model.encryption_configuration is not None  # type: ignore
-    # TODO(b/340879287): fix type error
-    assert model._bqml_model.model.encryption_configuration.kms_key_name == bq_cmek  # type: ignore
+    assert model._bqml_model is not None
+    assert model._bqml_model.model.encryption_configuration is not None
+    assert model._bqml_model.model.encryption_configuration.kms_key_name == bq_cmek
 
     # Assert that model exists in BQ with intended encryption
-    # TODO(b/340879287): fix type error
-    model_bq = session_with_bq_cmek.bqclient.get_model(model._bqml_model.model_name)  # type: ignore
+    model_bq = session_with_bq_cmek.bqclient.get_model(model._bqml_model.model_name)
     assert model_bq.encryption_configuration.kms_key_name == bq_cmek
 
     # Explicitly save the model to a destination and assert that encryption holds
@@ -260,12 +258,12 @@ def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
         f"{model_ref.project}.{model_ref.dataset_id}.{model_ref.model_id}"
     )
     new_model = model.to_gbq(model_ref_full_name)
-    # TODO(b/340879287): fix type error
-    assert new_model._bqml_model.model.encryption_configuration.kms_key_name == bq_cmek  # type: ignore
+    assert new_model._bqml_model is not None
+    assert new_model._bqml_model.model.encryption_configuration is not None
+    assert new_model._bqml_model.model.encryption_configuration.kms_key_name == bq_cmek
 
     # Assert that model exists in BQ with intended encryption
-    # TODO(b/340879287): fix type error
-    model_bq = session_with_bq_cmek.bqclient.get_model(new_model._bqml_model.model_name)  # type: ignore
+    model_bq = session_with_bq_cmek.bqclient.get_model(new_model._bqml_model.model_name)
     assert model_bq.encryption_configuration.kms_key_name == bq_cmek
 
     # Assert that model registration keeps the encryption
@@ -279,11 +277,11 @@ def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
     # https://cloud.google.com/vertex-ai/docs/general/cmek#create_resources_with_the_kms_key.
     # bigframes.ml does not provide any API for the model deployment.
     model_registered = new_model.register()
-    # TODO(b/340879287): fix type error
+    assert model_registered._bqml_model is not None
+    assert model_registered._bqml_model.model.encryption_configuration is not None
     assert (
-        model_registered._bqml_model.model.encryption_configuration.kms_key_name  # type: ignore
+        model_registered._bqml_model.model.encryption_configuration.kms_key_name
         == bq_cmek
     )
-    # TODO(b/340879287): fix type error
-    model_bq = session_with_bq_cmek.bqclient.get_model(new_model._bqml_model.model_name)  # type: ignore
+    model_bq = session_with_bq_cmek.bqclient.get_model(new_model._bqml_model.model_name)
     assert model_bq.encryption_configuration.kms_key_name == bq_cmek
diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py
index 58fd346bc1..d68cf6c3f3 100644
--- a/tests/system/small/test_index.py
+++ b/tests/system/small/test_index.py
@@ -24,20 +24,17 @@ def test_index_construct_from_list():
     bf_result = bpd.Index(
         [3, 14, 159], dtype=pd.Int64Dtype(), name="my_index"
     ).to_pandas()
-    # TODO(b/340878489): fix type error
-    pd_result = pd.Index([3, 14, 159], dtype=pd.Int64Dtype(), name="my_index")  # type: ignore
+    pd_result: pd.Index = pd.Index([3, 14, 159], dtype=pd.Int64Dtype(), name="my_index")
     pd.testing.assert_index_equal(bf_result, pd_result)
 
 
 def test_index_construct_from_series():
-    # TODO(b/340878489): fix type error
     bf_result = bpd.Index(
         bpd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"),
         name="index_name",
         dtype=pd.Int64Dtype(),
-    ).to_pandas()  # type: ignore
-    # TODO(b/340878489): fix type error
-    pd_result = pd.Index(  # type: ignore
+    ).to_pandas()
+    pd_result: pd.Index = pd.Index(
         pd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"),
         name="index_name",
         dtype=pd.Int64Dtype(),
@@ -49,14 +46,15 @@ def test_index_construct_from_index():
     bf_index_input = bpd.Index(
         [3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"
     )
-    # TODO(b/340878489): fix type error
     bf_result = bpd.Index(
-        bf_index_input, dtype=pd.Int64Dtype(), name="index_name"  # type: ignore
+        bf_index_input, dtype=pd.Int64Dtype(), name="index_name"
     ).to_pandas()
-    # TODO(b/340878489): fix type error
-    pd_index_input = pd.Index([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name")  # type: ignore
-    # TODO(b/340878489): fix type error
-    pd_result = pd.Index(pd_index_input, dtype=pd.Int64Dtype(), name="index_name")  # type: ignore
+    pd_index_input: pd.Index = pd.Index(
+        [3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"
+    )
+    pd_result: pd.Index = pd.Index(
+        pd_index_input, dtype=pd.Int64Dtype(), name="index_name"
+    )
     pd.testing.assert_index_equal(bf_result, pd_result)
 
 
@@ -365,17 +363,16 @@ def test_index_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep):
 
 
 def test_index_isin(scalars_df_index, scalars_pandas_df_index):
+    col_name = "int64_col"
     bf_series = (
-        scalars_df_index.set_index("int64_col").index.isin([2, 55555, 4]).to_pandas()
+        scalars_df_index.set_index(col_name).index.isin([2, 55555, 4]).to_pandas()
     )
-    pd_result_array = scalars_pandas_df_index.set_index("int64_col").index.isin(
+    pd_result_array = scalars_pandas_df_index.set_index(col_name).index.isin(
         [2, 55555, 4]
     )
-    # TODO(b/340878489): fix type error
-    pd.testing.assert_index_equal(  # type: ignore
-        pd.Index(pd_result_array),
+    pd.testing.assert_index_equal(
+        pd.Index(pd_result_array).set_names(col_name),
         bf_series,
-        check_names=False,
     )
 
 
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index 256046f8b1..30ffaa8a7d 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -394,12 +394,8 @@ def test_cut(scalars_dfs):
 
     # make sure the result is a supported dtype
     assert bf_result.dtype == bpd.Int64Dtype()
-
-    # TODO(b/340884971): fix type error
-    bf_result = bf_result.to_pandas()  # type: ignore
     pd_result = pd_result.astype("Int64")
-    # TODO(b/340884971): fix type error
-    pd.testing.assert_series_equal(bf_result, pd_result)  # type: ignore
+    pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
 def test_cut_default_labels(scalars_dfs):
@@ -529,13 +525,9 @@ def test_qcut(scalars_dfs, q):
         scalars_pandas_df["float64_col"], q, labels=False, duplicates="drop"
     )
     bf_result = bpd.qcut(scalars_df["float64_col"], q, labels=False, duplicates="drop")
-
-    # TODO(b/340884971): fix type error
-    bf_result = bf_result.to_pandas()  # type: ignore
     pd_result = pd_result.astype("Int64")
 
-    # TODO(b/340884971): fix type error
-    pd.testing.assert_series_equal(bf_result, pd_result)  # type: ignore
+    pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
 @pytest.mark.parametrize(
@@ -572,9 +564,8 @@ def test_to_datetime_scalar(arg, utc, unit, format):
     ],
 )
 def test_to_datetime_iterable(arg, utc, unit, format):
-    # TODO(b/340884971): fix type error
     bf_result = (
-        bpd.to_datetime(arg, utc=utc, unit=unit, format=format)  # type: ignore
+        bpd.to_datetime(arg, utc=utc, unit=unit, format=format)
         .to_pandas()
         .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
     )
@@ -589,9 +580,8 @@ def test_to_datetime_iterable(arg, utc, unit, format):
 def test_to_datetime_series(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col = "int64_too"
-    # TODO(b/340884971): fix type error
     bf_result = (
-        bpd.to_datetime(scalars_df[col], unit="s").to_pandas().astype("datetime64[s]")  # type: ignore
+        bpd.to_datetime(scalars_df[col], unit="s").to_pandas().astype("datetime64[s]")
     )
     pd_result = pd.Series(pd.to_datetime(scalars_pandas_df[col], unit="s"))
     pd.testing.assert_series_equal(
@@ -614,8 +604,7 @@ def test_to_datetime_series(scalars_dfs):
     ],
 )
 def test_to_datetime_unit_param(arg, unit):
-    # TODO(b/340884971): fix type error
-    bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]")  # type: ignore
+    bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]")
     pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us")
     pd.testing.assert_series_equal(
         bf_result, pd_result, check_index_type=False, check_names=False
@@ -632,9 +621,8 @@ def test_to_datetime_unit_param(arg, unit):
     ],
 )
 def test_to_datetime_format_param(arg, utc, format):
-    # TODO(b/340884971): fix type error
     bf_result = (
-        bpd.to_datetime(arg, utc=utc, format=format)  # type: ignore
+        bpd.to_datetime(arg, utc=utc, format=format)
         .to_pandas()
         .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
     )
@@ -686,9 +674,8 @@ def test_to_datetime_format_param(arg, utc, format):
     ],
 )
 def test_to_datetime_string_inputs(arg, utc, output_in_utc, format):
-    # TODO(b/340884971): fix type error
     bf_result = (
-        bpd.to_datetime(arg, utc=utc, format=format)  # type: ignore
+        bpd.to_datetime(arg, utc=utc, format=format)
         .to_pandas()
         .astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]")
     )
@@ -730,9 +717,8 @@ def test_to_datetime_string_inputs(arg, utc, output_in_utc, format):
     ],
 )
 def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc):
-    # TODO(b/340884971): fix type error
     bf_result = (
-        bpd.to_datetime(arg, utc=utc)  # type: ignore
+        bpd.to_datetime(arg, utc=utc)
         .to_pandas()
         .astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]")
     )
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index d2ee4411f4..d84d520988 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -17,6 +17,7 @@
 import google.api_core.exceptions
 from google.cloud import bigquery
 import pandas as pd
+import pyarrow
 import pytest
 
 import bigframes
@@ -80,7 +81,7 @@ def session_with_bq_connection(
     bq_cf_connection, dataset_id_permanent
 ) -> bigframes.Session:
     session = bigframes.Session(
-        bigframes.BigQueryOptions(bq_connection=bq_cf_connection)
+        bigframes.BigQueryOptions(bq_connection=bq_cf_connection, location="US")
     )
     return session
 
@@ -484,17 +485,27 @@ def add_one(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_series_map(session_with_bq_connection, scalars_dfs):
-    def add_one(x):
-        return x + 1
+def test_series_map_bytes(session_with_bq_connection, scalars_dfs):
+    """Check that bytes is support as input and output."""
+    scalars_df, scalars_pandas_df = scalars_dfs
 
-    remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
+    def bytes_to_hex(mybytes: bytes) -> bytes:
+        import pandas
 
-    scalars_df, scalars_pandas_df = scalars_dfs
+        return mybytes.hex().encode("utf-8") if pandas.notna(mybytes) else None  # type: ignore
 
-    bf_result = scalars_df.int64_too.map(remote_add_one).to_pandas()
-    pd_result = scalars_pandas_df.int64_too.map(add_one)
-    pd_result = pd_result.astype("Int64")  # pandas type differences
+    # TODO(b/345516010): the type: ignore is because "Optional" not yet
+    # supported as a type annotation in @remote_function().
+    assert bytes_to_hex(None) is None  # type: ignore
+    assert bytes_to_hex(b"\x00\xdd\xba\x11") == b"00ddba11"
+    pd_result = scalars_pandas_df.bytes_col.map(bytes_to_hex).astype(
+        pd.ArrowDtype(pyarrow.binary())
+    )
+
+    remote_bytes_to_hex = session_with_bq_connection.remote_function(
+        packages=["pandas"]
+    )(bytes_to_hex)
+    bf_result = scalars_df.bytes_col.map(remote_bytes_to_hex).to_pandas()
 
     pd.testing.assert_series_equal(
         bf_result,
@@ -537,12 +548,12 @@ def add_one(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_read_gbq_function_detects_invalid_function(bigquery_client, dataset_id):
+def test_read_gbq_function_detects_invalid_function(session, dataset_id):
     dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
     with pytest.raises(ValueError) as e:
         rf.read_gbq_function(
             str(dataset_ref.routine("not_a_function")),
-            bigquery_client=bigquery_client,
+            session=session,
         )
 
     assert "Unknown function" in str(e.value)
@@ -550,6 +561,7 @@ def test_read_gbq_function_detects_invalid_function(bigquery_client, dataset_id)
 
 @pytest.mark.flaky(retries=2, delay=120)
 def test_read_gbq_function_like_original(
+    session,
     bigquery_client,
     bigqueryconnection_client,
     cloudfunctions_client,
@@ -577,7 +589,7 @@ def square1(x):
 
     square2 = rf.read_gbq_function(
         function_name=square1.bigframes_remote_function,
-        bigquery_client=bigquery_client,
+        session=session,
     )
 
     # The newly-created function (square1) should have a remote function AND a
@@ -607,7 +619,14 @@ def square1(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
+def test_read_gbq_function_runs_existing_udf(session, bigquery_client, dataset_id):
+    func = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only")
+    got = func("AURÉLIE")
+    assert got == "aurÉlie"
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id):
     dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
     arg = bigquery.RoutineArgument(
         name="x",
@@ -633,7 +652,8 @@ def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
         # Create the routine in BigQuery and read it back using read_gbq_function.
         bigquery_client.create_routine(routine, exists_ok=True)
         square = rf.read_gbq_function(
-            str(routine.reference), bigquery_client=bigquery_client
+            str(routine.reference),
+            session=session,
         )
 
         # It should point to the named routine and yield the expected results.
@@ -649,16 +669,17 @@ def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
 
         indirect_df = bigframes.dataframe.DataFrame(src)
         indirect_df = indirect_df.assign(y=indirect_df.x.apply(square))
-        # TODO(b/340875260): fix type error
-        indirect_df = indirect_df.to_pandas()  # type: ignore
+        converted_indirect_df = indirect_df.to_pandas()
 
         assert_pandas_df_equal(
-            direct_df, indirect_df, ignore_order=True, check_index_type=False
+            direct_df, converted_indirect_df, ignore_order=True, check_index_type=False
         )
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_read_gbq_function_enforces_explicit_types(bigquery_client, dataset_id):
+def test_read_gbq_function_enforces_explicit_types(
+    session, bigquery_client, dataset_id
+):
     dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
     typed_arg = bigquery.RoutineArgument(
         name="x",
@@ -702,24 +723,35 @@ def test_read_gbq_function_enforces_explicit_types(bigquery_client, dataset_id):
     bigquery_client.create_routine(neither_type_specified, exists_ok=True)
 
     rf.read_gbq_function(
-        str(both_types_specified.reference), bigquery_client=bigquery_client
+        str(both_types_specified.reference),
+        session=session,
     )
     rf.read_gbq_function(
-        str(only_return_type_specified.reference), bigquery_client=bigquery_client
+        str(only_return_type_specified.reference),
+        session=session,
     )
     with pytest.raises(ValueError):
         rf.read_gbq_function(
-            str(only_arg_type_specified.reference), bigquery_client=bigquery_client
+            str(only_arg_type_specified.reference),
+            session=session,
         )
     with pytest.raises(ValueError):
         rf.read_gbq_function(
-            str(neither_type_specified.reference), bigquery_client=bigquery_client
+            str(neither_type_specified.reference),
+            session=session,
         )
 
 
 @pytest.mark.flaky(retries=2, delay=120)
 def test_df_apply_axis_1(session, scalars_dfs):
-    columns = ["bool_col", "int64_col", "int64_too", "float64_col", "string_col"]
+    columns = [
+        "bool_col",
+        "int64_col",
+        "int64_too",
+        "float64_col",
+        "string_col",
+        "bytes_col",
+    ]
     scalars_df, scalars_pandas_df = scalars_dfs
 
     def add_ints(row):
@@ -729,9 +761,10 @@ def add_ints(row):
         bigframes.exceptions.PreviewWarning,
         match="input_types=Series is in preview.",
     ):
-        add_ints_remote = session.remote_function(bigframes.series.Series, int)(
-            add_ints
-        )
+        add_ints_remote = session.remote_function(
+            bigframes.series.Series,
+            int,
+        )(add_ints)
 
     with pytest.warns(
         bigframes.exceptions.PreviewWarning, match="axis=1 scenario is in preview."
@@ -829,7 +862,6 @@ def add_ints(row):
 @pytest.mark.parametrize(
     ("column"),
     [
-        pytest.param("bytes_col"),
         pytest.param("date_col"),
         pytest.param("datetime_col"),
         pytest.param("geography_col"),
@@ -854,7 +886,9 @@ def echo(row):
     with pytest.raises(
         NotImplementedError,
         match=re.escape(
-            f"DataFrame has a column of dtype '{dtype}' which is not supported with axis=1. Supported dtypes are ('Int64', 'Float64', 'boolean', 'string')."
+            f"DataFrame has a column of dtype '{dtype}' which is not supported with axis=1. Supported dtypes are ("
         ),
+    ), pytest.warns(
+        bigframes.exceptions.PreviewWarning, match="axis=1 scenario is in preview."
     ):
         scalars_df[[column]].apply(echo, axis=1)
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
new file mode 100644
index 0000000000..12c0d6e259
--- /dev/null
+++ b/tests/system/small/test_unordered.py
@@ -0,0 +1,28 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pandas as pd
+
+import bigframes.pandas as bpd
+from tests.system.utils import assert_pandas_df_equal
+
+
+def test_unordered_mode_cache_aggregate(unordered_session):
+    pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
+    df = bpd.DataFrame(pd_df, session=unordered_session)
+    mean_diff = df - df.mean()
+    mean_diff.cache()
+    bf_result = mean_diff.to_pandas(ordered=False)
+    pd_result = pd_df - pd_df.mean()
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
diff --git a/tests/unit/_config/test_threaded_options.py b/tests/unit/_config/test_threaded_options.py
new file mode 100644
index 0000000000..7fc97a9f72
--- /dev/null
+++ b/tests/unit/_config/test_threaded_options.py
@@ -0,0 +1,41 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import threading
+
+import bigframes._config
+
+
+def test_mutate_options_threaded():
+    options = bigframes._config.Options()
+    options.display.max_rows = 50
+    result_dict = {"this_before": options.display.max_rows}
+
+    def mutate_options_threaded(options, result_dict):
+        result_dict["other_before"] = options.display.max_rows
+
+        options.display.max_rows = 100
+        result_dict["other_after"] = options.display.max_rows
+
+    thread = threading.Thread(
+        target=(lambda: mutate_options_threaded(options, result_dict))
+    )
+    thread.start()
+    thread.join(1)
+    result_dict["this_after"] = options.display.max_rows
+
+    assert result_dict["this_before"] == 50
+    assert result_dict["this_after"] == 50
+    assert result_dict["other_before"] == 25
+    assert result_dict["other_after"] == 100
diff --git a/tests/unit/core/compiler/__init__.py b/tests/unit/core/compiler/__init__.py
new file mode 100644
index 0000000000..6d5e14bcf4
--- /dev/null
+++ b/tests/unit/core/compiler/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/core/compiler/test_googlesql.py b/tests/unit/core/compiler/test_googlesql.py
new file mode 100644
index 0000000000..70ca5cfa12
--- /dev/null
+++ b/tests/unit/core/compiler/test_googlesql.py
@@ -0,0 +1,155 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes.core.compile.googlesql as sql
+
+
+@pytest.mark.parametrize(
+    ("table_id", "dataset_id", "project_id", "expected"),
+    [
+        pytest.param("a", None, None, "`a`"),
+        pytest.param("a", "b", None, "`b`.`a`"),
+        pytest.param("a", "b", "c", "`c`.`b`.`a`"),
+        pytest.param("a", None, "c", None, marks=pytest.mark.xfail(raises=ValueError)),
+    ],
+)
+def test_table_expression(table_id, dataset_id, project_id, expected):
+    expr = sql.TableExpression(
+        table_id=table_id, dataset_id=dataset_id, project_id=project_id
+    )
+    assert expr.sql() == expected
+
+
+@pytest.mark.parametrize(
+    ("table_name", "alias", "expected"),
+    [
+        pytest.param(None, None, None, marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param("a", None, "`a`"),
+        pytest.param("a", "aa", "`a` AS `aa`"),
+    ],
+)
+def test_from_item_w_table_name(table_name, alias, expected):
+    expr = sql.FromItem(
+        table_name=None
+        if table_name is None
+        else sql.TableExpression(table_id=table_name),
+        as_alias=None
+        if alias is None
+        else sql.AsAlias(sql.AliasExpression(alias=alias)),
+    )
+    assert expr.sql() == expected
+
+
+def test_from_item_w_query_expr():
+    from_clause = sql.FromClause(
+        sql.FromItem(table_name=sql.TableExpression(table_id="table_a"))
+    )
+    select = sql.Select(
+        select_list=[sql.SelectAll(sql.StarExpression())],
+        from_clause_list=[from_clause],
+    )
+    query_expr = sql.QueryExpr(select=select)
+    expected = "SELECT\n*\nFROM\n`table_a`"
+
+    # A QueryExpr object
+    expr = sql.FromItem(query_expr=query_expr)
+    assert expr.sql() == f"({expected})"
+
+    # A str object
+    expr = sql.FromItem(query_expr=expected)
+    assert expr.sql() == f"({expected})"
+
+
+def test_from_item_w_cte():
+    expr = sql.FromItem(cte_name=sql.CTEExpression("test"))
+    assert expr.sql() == "`test`"
+
+
+@pytest.mark.parametrize(
+    ("col_name", "alias", "expected"),
+    [
+        pytest.param("a", None, "`a`"),
+        pytest.param("a", "aa", "`a` AS `aa`"),
+    ],
+)
+def test_select_expression(col_name, alias, expected):
+    expr = sql.SelectExpression(
+        expression=sql.ColumnExpression(col_name),
+        alias=None if alias is None else sql.AliasExpression(alias=alias),
+    )
+    assert expr.sql() == expected
+
+
+def test_select():
+    select_1 = sql.SelectExpression(expression=sql.ColumnExpression("a"))
+    select_2 = sql.SelectExpression(
+        expression=sql.ColumnExpression("b"), alias=sql.AliasExpression(alias="bb")
+    )
+    from_1 = sql.FromItem(table_name=sql.TableExpression(table_id="table_a"))
+    from_2 = sql.FromItem(
+        query_expr="SELECT * FROM project.table_b",
+        as_alias=sql.AsAlias(sql.AliasExpression(alias="table_b")),
+    )
+    expr = sql.Select(
+        select_list=[select_1, select_2],
+        from_clause_list=[sql.FromClause(from_1), sql.FromClause(from_2)],
+    )
+    expected = "SELECT\n`a`,\n`b` AS `bb`\nFROM\n`table_a`,\n(SELECT * FROM project.table_b) AS `table_b`"
+
+    assert expr.sql() == expected
+
+
+def test_query_expr_w_cte():
+    # Test a simple SELECT query.
+    from_clause1 = sql.FromClause(
+        sql.FromItem(table_name=sql.TableExpression(table_id="table_a"))
+    )
+    select1 = sql.Select(
+        select_list=[sql.SelectAll(sql.StarExpression())],
+        from_clause_list=[from_clause1],
+    )
+    query1 = sql.QueryExpr(select=select1)
+    query1_sql = "SELECT\n*\nFROM\n`table_a`"
+    assert query1.sql() == query1_sql
+
+    # Test a query with CTE statements.
+    cte1 = sql.NonRecursiveCTE(cte_name=sql.CTEExpression("a"), query_expr=query1)
+    cte2 = sql.NonRecursiveCTE(cte_name=sql.CTEExpression("b"), query_expr=query1)
+
+    cte1_sql = f"`a` AS (\n{query1_sql}\n)"
+    cte2_sql = f"`b` AS (\n{query1_sql}\n)"
+    assert cte1.sql() == cte1_sql
+    assert cte2.sql() == cte2_sql
+
+    with_cte_list = [cte1, cte2]
+    select2 = sql.Select(
+        select_list=[
+            sql.SelectExpression(
+                sql.ColumnExpression(parent=cte1.cte_name, name="column_x")
+            ),
+            sql.SelectAll(sql.StarExpression(parent=cte2.cte_name)),
+        ],
+        from_clause_list=[
+            sql.FromClause(sql.FromItem(cte_name=cte1.cte_name)),
+            sql.FromClause(sql.FromItem(cte_name=cte2.cte_name)),
+        ],
+    )
+    select2_sql = "SELECT\n`a`.`column_x`,\n`b`.*\nFROM\n`a`,\n`b`"
+    assert select2.sql() == select2_sql
+
+    query2 = sql.QueryExpr(select=select2, with_cte_list=with_cte_list)
+    query2_sql = f"WITH {cte1_sql},\n{cte2_sql}\n{select2_sql}"
+    assert query2.sql() == query2_sql
diff --git a/tests/unit/test_dtypes.py b/tests/unit/core/test_dtypes.py
similarity index 86%
rename from tests/unit/test_dtypes.py
rename to tests/unit/core/test_dtypes.py
index dabbf11c6c..ae194be83f 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/core/test_dtypes.py
@@ -20,6 +20,7 @@
 import pyarrow as pa  # type: ignore
 import pytest
 
+import bigframes.core.compile.ibis_types
 import bigframes.dtypes
 
 
@@ -67,14 +68,14 @@
 )
 def test_ibis_dtype_converts(ibis_dtype, bigframes_dtype):
     """Test all the Ibis data types needed to read BigQuery tables"""
-    result = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_dtype)
+    result = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(ibis_dtype)
     assert result == bigframes_dtype
 
 
 def test_ibis_timestamp_pst_raises_unexpected_datatype():
     """BigQuery timestamp only supports UTC time"""
     with pytest.raises(ValueError, match="Unexpected Ibis data type"):
-        bigframes.dtypes.ibis_dtype_to_bigframes_dtype(
+        bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
             ibis_dtypes.Timestamp(timezone="PST")
         )
 
@@ -82,7 +83,9 @@ def test_ibis_timestamp_pst_raises_unexpected_datatype():
 def test_ibis_float32_raises_unexpected_datatype():
     """Other Ibis types not read from BigQuery are not expected"""
     with pytest.raises(ValueError, match="Unexpected Ibis data type"):
-        bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_dtypes.float32)
+        bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
+            ibis_dtypes.float32
+        )
 
 
 IBIS_ARROW_DTYPES = (
@@ -139,13 +142,13 @@ def test_ibis_float32_raises_unexpected_datatype():
 
 @pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
 def test_arrow_dtype_to_ibis_dtype(ibis_dtype, arrow_dtype):
-    result = bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_dtype)
+    result = bigframes.core.compile.ibis_types._arrow_dtype_to_ibis_dtype(arrow_dtype)
     assert result == ibis_dtype
 
 
 @pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
 def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
-    result = bigframes.dtypes.ibis_dtype_to_arrow_dtype(ibis_dtype)
+    result = bigframes.core.compile.ibis_types._ibis_dtype_to_arrow_dtype(ibis_dtype)
     assert result == arrow_dtype
 
 
@@ -178,7 +181,9 @@ def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
 )
 def test_bigframes_dtype_converts(ibis_dtype, bigframes_dtype):
     """Test all the Ibis data types needed to read BigQuery tables"""
-    result = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(bigframes_dtype)
+    result = bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(
+        bigframes_dtype
+    )
     assert result == ibis_dtype
 
 
@@ -203,20 +208,22 @@ def test_bigframes_dtype_converts(ibis_dtype, bigframes_dtype):
 )
 def test_bigframes_string_dtype_converts(ibis_dtype, bigframes_dtype_str):
     """Test all the Ibis data types needed to read BigQuery tables"""
-    result = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(bigframes_dtype_str)
+    result = bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(
+        bigframes_dtype_str
+    )
     assert result == ibis_dtype
 
 
 def test_unsupported_dtype_raises_unexpected_datatype():
     """Incompatible dtypes should fail when passed into BigQuery DataFrames"""
     with pytest.raises(ValueError, match="Unexpected data type"):
-        bigframes.dtypes.bigframes_dtype_to_ibis_dtype(np.float32)
+        bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(np.float32)
 
 
 def test_unsupported_dtype_str_raises_unexpected_datatype():
     """Incompatible dtypes should fail when passed into BigQuery DataFrames"""
     with pytest.raises(ValueError, match="Unexpected data type"):
-        bigframes.dtypes.bigframes_dtype_to_ibis_dtype("int64")
+        bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype("int64")
 
 
 @pytest.mark.parametrize(
@@ -228,21 +235,23 @@ def test_unsupported_dtype_str_raises_unexpected_datatype():
     ],
 )
 def test_literal_to_ibis_scalar_converts(literal, ibis_scalar):
-    assert bigframes.dtypes.literal_to_ibis_scalar(literal).equals(ibis_scalar)
+    assert bigframes.core.compile.ibis_types.literal_to_ibis_scalar(literal).equals(
+        ibis_scalar
+    )
 
 
 def test_literal_to_ibis_scalar_throws_on_incompatible_literal():
     with pytest.raises(
         ValueError,
     ):
-        bigframes.dtypes.literal_to_ibis_scalar({"mykey": "myval"})
+        bigframes.core.compile.ibis_types.literal_to_ibis_scalar({"mykey": "myval"})
 
 
 def test_remote_function_io_types_are_supported_bigframes_types():
     from ibis.expr.datatypes.core import dtype as python_type_to_bigquery_type
 
-    from bigframes.dtypes import SUPPORTED_IO_PYTHON_TYPES as rf_supported_io_types
+    from bigframes.dtypes import RF_SUPPORTED_IO_PYTHON_TYPES as rf_supported_io_types
 
     for python_type in rf_supported_io_types:
         ibis_type = python_type_to_bigquery_type(python_type)
-        assert ibis_type in bigframes.dtypes.IBIS_TO_BIGFRAMES
+        assert ibis_type in bigframes.core.compile.ibis_types.IBIS_TO_BIGFRAMES
diff --git a/tests/unit/core/test_sql.py b/tests/unit/core/test_sql.py
new file mode 100644
index 0000000000..29f1e48a70
--- /dev/null
+++ b/tests/unit/core/test_sql.py
@@ -0,0 +1,78 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from bigframes.core import sql
+
+
+def test_create_vector_search_sql_simple():
+    sql_string = "SELECT embedding FROM my_embeddings_table WHERE id = 1"
+    options = {
+        "base_table": "my_base_table",
+        "column_to_search": "my_embedding_column",
+        "distance_type": "COSINE",
+        "top_k": 10,
+        "use_brute_force": False,
+    }
+
+    expected_query = f"""
+    SELECT
+        query.*,
+        base.*,
+        distance,
+    FROM VECTOR_SEARCH(
+        TABLE `my_base_table`,
+        'my_embedding_column',
+        ({sql_string}),
+        distance_type => 'COSINE',
+        top_k => 10
+    )
+    """
+
+    result_query = sql.create_vector_search_sql(
+        sql_string, options  # type:ignore
+    )
+    assert result_query == expected_query
+
+
+def test_create_vector_search_sql_query_column_to_search():
+    sql_string = "SELECT embedding FROM my_embeddings_table WHERE id = 1"
+    options = {
+        "base_table": "my_base_table",
+        "column_to_search": "my_embedding_column",
+        "distance_type": "COSINE",
+        "top_k": 10,
+        "query_column_to_search": "new_embedding_column",
+        "use_brute_force": False,
+    }
+
+    expected_query = f"""
+    SELECT
+        query.*,
+        base.*,
+        distance,
+    FROM VECTOR_SEARCH(
+        TABLE `my_base_table`,
+        'my_embedding_column',
+        ({sql_string}),
+        'new_embedding_column',
+        distance_type => 'COSINE',
+        top_k => 10
+    )
+    """
+
+    result_query = sql.create_vector_search_sql(
+        sql_string, options  # type:ignore
+    )
+    assert result_query == expected_query
diff --git a/tests/unit/functions/test_remote_function_template.py b/tests/unit/functions/test_remote_function_template.py
new file mode 100644
index 0000000000..70b033d938
--- /dev/null
+++ b/tests/unit/functions/test_remote_function_template.py
@@ -0,0 +1,193 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+import pandas as pd
+import pandas.testing
+import pyarrow
+import pytest
+
+import bigframes.dtypes
+import bigframes.functions.remote_function_template as remote_function_template
+
+HELLO_WORLD_BASE64_BYTES = b"SGVsbG8sIFdvcmxkIQ=="
+HELLO_WORLD_BASE64_STR = "SGVsbG8sIFdvcmxkIQ=="
+
+
+@pytest.mark.parametrize(
+    ["type_", "json_value", "expected"],
+    (
+        pytest.param(
+            # Type names should match those in BigQueryType.from_ibis in
+            # third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
+            "BOOLEAN",
+            True,
+            True,
+        ),
+        pytest.param(
+            "BYTES",
+            HELLO_WORLD_BASE64_STR,
+            b"Hello, World!",
+        ),
+        pytest.param(
+            "FLOAT64",
+            1.25,
+            1.25,
+        ),
+        pytest.param(
+            "INT64",
+            123,
+            123,
+        ),
+        pytest.param(
+            "STRING",
+            "Hello, World!",
+            "Hello, World!",
+        ),
+    ),
+)
+def test_convert_from_bq_json(type_, json_value, expected):
+    got = remote_function_template.convert_from_bq_json(type_, json_value)
+    assert got == expected
+
+
+@pytest.mark.parametrize(
+    "type_",
+    [
+        # Type names should match those in BigQueryType.from_ibis in
+        # third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
+        "BOOLEAN",
+        "BYTES",
+        "FLOAT64",
+        "INT64",
+        "STRING",
+    ],
+)
+def test_convert_from_bq_json_none(type_):
+    got = remote_function_template.convert_from_bq_json(type_, None)
+    assert got is None
+
+
+@pytest.mark.parametrize(
+    ["type_", "value", "expected"],
+    (
+        pytest.param(
+            # Type names should match those in BigQueryType.from_ibis in
+            # third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
+            "BOOLEAN",
+            True,
+            True,
+        ),
+        pytest.param(
+            "BYTES",
+            b"Hello, World!",
+            HELLO_WORLD_BASE64_STR,
+        ),
+        pytest.param(
+            "FLOAT64",
+            1.25,
+            1.25,
+        ),
+        pytest.param(
+            "INT64",
+            123,
+            123,
+        ),
+        pytest.param(
+            "STRING",
+            "Hello, World!",
+            "Hello, World!",
+        ),
+    ),
+)
+def test_convert_to_bq_json(type_, value, expected):
+    got = remote_function_template.convert_to_bq_json(type_, value)
+    assert got == expected
+
+
+@pytest.mark.parametrize(
+    "type_",
+    [
+        # Type names should match those in BigQueryType.from_ibis in
+        # third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
+        "BOOLEAN",
+        "BYTES",
+        "FLOAT64",
+        "INT64",
+        "STRING",
+    ],
+)
+def test_convert_to_bq_json_none(type_):
+    got = remote_function_template.convert_to_bq_json(type_, None)
+    assert got is None
+
+
+@pytest.mark.parametrize(
+    ["row_json", "expected"],
+    (
+        pytest.param(
+            json.dumps(
+                {
+                    "names": ["'my-index'", "'col1'", "'col2'", "'col3'"],
+                    "types": ["string", "Int64", "Int64", "Int64"],
+                    "values": ["my-index-value", "1", None, "-1"],
+                    "indexlength": 1,
+                    "dtype": "Int64",
+                }
+            ),
+            pd.Series(
+                [1, pd.NA, -1],
+                dtype="Int64",
+                index=["col1", "col2", "col3"],
+                name="my-index-value",
+            ),
+            id="int64-string-index",
+        ),
+        pytest.param(
+            json.dumps(
+                {
+                    "names": ["'col1'", "'col2'", "'col3'"],
+                    "types": ["binary[pyarrow]", "binary[pyarrow]", "binary[pyarrow]"],
+                    "values": [HELLO_WORLD_BASE64_STR, "dGVzdDI=", "dGVzdDM="],
+                    "indexlength": 0,
+                    "dtype": "binary[pyarrow]",
+                }
+            ),
+            pd.Series(
+                [b"Hello, World!", b"test2", b"test3"],
+                dtype=pd.ArrowDtype(pyarrow.binary()),
+                index=["col1", "col2", "col3"],
+                name=(),
+            ),
+            id="binary-no-index",
+        ),
+    ),
+)
+def test_get_pd_series(row_json, expected):
+    got = remote_function_template.get_pd_series(row_json)
+    pandas.testing.assert_series_equal(got, expected)
+
+
+def test_get_pd_series_converter_dtypes():
+    """Ensures the string format of the dtype doesn't change from that expected by get_pd_series."""
+
+    # Keep in sync with value_converters in get_pd_series.
+    # NOTE: Any change here is a red flag that there has been a breaking change
+    # that will affect deployed axis=1 remote functions.
+    assert str(bigframes.dtypes.BOOL_DTYPE) == "boolean"
+    assert str(bigframes.dtypes.BYTES_DTYPE) == "binary[pyarrow]"
+    assert str(bigframes.dtypes.FLOAT_DTYPE) == "Float64"
+    assert str(bigframes.dtypes.INT_DTYPE) == "Int64"
+    assert str(bigframes.dtypes.STRING_DTYPE) == "string"
diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py
index 54a7a79d3c..408590d4bb 100644
--- a/tests/unit/test_pandas.py
+++ b/tests/unit/test_pandas.py
@@ -95,8 +95,7 @@ def test_cut_raises_with_labels():
         match="The 'labels' parameter must be either False or None.",
     ):
         mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
-        # TODO(b/340893280): fix type error
-        bigframes.pandas.cut(mock_series, 4, labels=["a", "b", "c", "d"])  # type: ignore
+        bigframes.pandas.cut(mock_series, 4, labels=["a", "b", "c", "d"])
 
 
 @pytest.mark.parametrize(
diff --git a/tests/unit/test_remote_function.py b/tests/unit/test_remote_function.py
index 1bd3f3b14f..6868e85b9c 100644
--- a/tests/unit/test_remote_function.py
+++ b/tests/unit/test_remote_function.py
@@ -19,6 +19,7 @@
 import pandas
 import pytest
 
+import bigframes.core.compile.ibis_types
 import bigframes.dtypes
 import bigframes.functions.remote_function
 import bigframes.series
@@ -62,11 +63,11 @@ def axis_1_function(myparam: series_type) -> str:  # type: ignore
 def test_supported_types_correspond():
     # The same types should be representable by the supported Python and BigQuery types.
     ibis_types_from_python = {
-        ibis_types.dtype(t) for t in bigframes.dtypes.SUPPORTED_IO_PYTHON_TYPES
+        ibis_types.dtype(t) for t in bigframes.dtypes.RF_SUPPORTED_IO_PYTHON_TYPES
     }
     ibis_types_from_bigquery = {
         third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
-        for tk in bigframes.dtypes.SUPPORTED_IO_BIGQUERY_TYPEKINDS
+        for tk in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
     }
 
     assert ibis_types_from_python == ibis_types_from_bigquery
diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py
index eb6b9161fc..b0e1a09392 100644
--- a/third_party/bigframes_vendored/pandas/core/indexes/base.py
+++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py
@@ -1,6 +1,8 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/base.py
 from __future__ import annotations
 
+import typing
+
 from bigframes import constants
 
 
@@ -320,7 +322,7 @@ def drop(self, labels) -> Index:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def dropna(self, how: str = "any"):
+    def dropna(self, how: typing.Literal["all", "any"] = "any"):
         """Return Index without NA/NaN values.
 
         Args:
diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
index 3d460b2b16..52b287b949 100644
--- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
+++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
@@ -1,14 +1,13 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/tools/datetimes.py
 
 from datetime import datetime
-from typing import Iterable, Mapping, Union
+from typing import List, Mapping, Tuple, Union
 
 import pandas as pd
 
 from bigframes import constants, series
 
-local_scalars = Union[int, float, str, datetime]
-local_iterables = Union[Iterable, pd.Series, pd.DataFrame, Mapping]
+local_iterables = Union[List, Tuple, pd.Series, pd.DataFrame, Mapping]
 
 
 def to_datetime(
diff --git a/third_party/bigframes_vendored/sklearn/base.py b/third_party/bigframes_vendored/sklearn/base.py
index 1a151a1119..57c9e79f8d 100644
--- a/third_party/bigframes_vendored/sklearn/base.py
+++ b/third_party/bigframes_vendored/sklearn/base.py
@@ -87,7 +87,7 @@ def score(self, X, y):
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#classification_models
             for the outputs relevant to this model type.
 
@@ -115,7 +115,7 @@ def score(self, X, y):
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#regression_models
             for the outputs relevant to this model type.
 
diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
index 386b620f4a..aaf43dbcfe 100644
--- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
+++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
@@ -103,7 +103,7 @@ def score(
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#k-means_models
             for the outputs relevant to this model type.
 
diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
index 71e53bf4a9..ae6f0b0561 100644
--- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
+++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
@@ -52,7 +52,7 @@ def score(self, X=None, y=None):
 
         .. note::
 
-            Output matches that of the BigQuery ML.EVALUTE function.
+            Output matches that of the BigQuery ML.EVALUATE function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#pca_models
             for the outputs relevant to this model type.
 
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
index 49198eb9bd..c52a37018c 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
@@ -26,8 +26,10 @@ class LogisticRegression(LinearClassifierMixin, BaseEstimator):
     Args:
         optimize_strategy (str, default "auto_strategy"):
             The strategy to train logistic regression models. Possible values are
-            "auto_strategy", "batch_gradient_descent", "normal_equation". Default
-            to "auto_strategy".
+            "auto_strategy" and "batch_gradient_descent". The two are equilevant since
+            "auto_strategy" will fall back to "batch_gradient_descent". The API is kept
+            for consistency.
+            Default to "auto_strategy".
         fit_intercept (default True):
             Default True. Specifies if a constant (a.k.a. bias or intercept)
             should be added to the decision function.