sdpython
diff --git a/‎_doc/api/ort.rst
+2 b/‎_doc/api/ort.rst
+2
diff --git a/‎_doc/api/plotting.rst
+5 b/‎_doc/api/plotting.rst
+5
diff --git a/‎_doc/examples/plot_profiling.py
+50-131 b/‎_doc/examples/plot_profiling.py
+50-131
diff --git a/‎_unittests/ut_ort/data/prof_base.xlsx
30.2 KB b/‎_unittests/ut_ort/data/prof_base.xlsx
30.2 KB
diff --git a/‎_unittests/ut_ort/data/prof_opti.xlsx
38.3 KB b/‎_unittests/ut_ort/data/prof_opti.xlsx
38.3 KB
diff --git a/‎_unittests/ut_ort/test_ort_profile.py
+29-2 b/‎_unittests/ut_ort/test_ort_profile.py
+29-2
@@ -23,5 +23,7 @@ OrtTensor
 Profiling
 +++++++++
 
+.. autofunction:: onnx_array_api.ort.ort_profile.merge_ort_profile
+
 .. autofunction:: onnx_array_api.ort.ort_profile.ort_profile
 
@@ -8,6 +8,11 @@ Dot
 
 .. autofunction:: onnx_array_api.plotting.dot_plot.to_dot
 
+Statistics
+++++++++++
+
+.. autofunction:: onnx_array_api.plotting.stat_plot.plot_ort_profile
+
 Text
 ++++
 
 
@@ -21,17 +21,22 @@
 from onnxruntime import get_available_providers
 from onnx_array_api.ext_test_case import example_path
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
-from onnx_array_api.ort.ort_profile import ort_profile
+from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile
+from onnx_array_api.plotting.stat_plot import plot_ort_profile
 
 
-filename = example_path("data/small.onnx")
+suffix = ""
+filename = example_path(f"data/small{suffix}.onnx")
 optimized = filename + ".optimized.onnx"
+print(f"model={filename!r}")
 
 if not os.path.exists(optimized):
     ort_optimized_model(filename, output=optimized)
-print(optimized)
+print(f"optimized={optimized!r}")
 
 #############################
+# .. _l-example-ort-profiling:
+#
 # Profiling
 # +++++++++
 
@@ -43,50 +48,31 @@
     disable_optimization=True,
     providers=["CPUExecutionProvider"],
 )
-prof_base.to_excel("prof_base.xlsx", index=False)
+prof_base.to_excel(f"prof_base{suffix}.xlsx", index=False)
 prof_base
 
 #######################################
 # And the optimized model.
 
-prof_opt = ort_profile(
+prof_opti = ort_profile(
     optimized,
     feeds,
     repeat=6,
     disable_optimization=True,
     providers=["CPUExecutionProvider"],
 )
-prof_opt
+prof_opti.to_excel(f"prof_opti{suffix}.xlsx", index=False)
+prof_opti
 
 #######################################
 # And the graph is:
 
-
-def plot_profile(df, ax0, ax1=None, title=None):
-    gr_dur = (
-        df[["dur", "args_op_name"]].groupby("args_op_name").sum().sort_values("dur")
-    )
-    gr_dur.plot.barh(ax=ax0)
-    if title is not None:
-        ax0.set_title(title)
-    if ax1 is not None:
-        gr_n = (
-            df[["dur", "args_op_name"]]
-            .groupby("args_op_name")
-            .count()
-            .sort_values("dur")
-        )
-        gr_n = gr_n.loc[gr_dur.index, :]
-        gr_n.plot.barh(ax=ax1)
-        ax1.set_title("n occurences")
-
-
 unique_op = set(prof_base["args_op_name"])
 fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
-plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
-plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
-
-fig.savefig("plot_profiling.png")
+plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
+plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
+fig.tight_layout()
+fig.savefig(f"plot_profiling{suffix}.png")
 
 ##################################################
 # Merging profiles
@@ -96,103 +82,14 @@ def plot_profile(df, ax0, ax1=None, title=None):
 # process the same image and the input and output size are the
 # same at every iteration.
 
-
-def preprocess(df):
-    groupkey = [
-        "args_op_name",
-        "args_output_type_shape",
-        "args_input_type_shape",
-        "args_provider",
-    ]
-
-    def _idx(row):
-        """
-        There may be multiple node with the same
-        input/output types and shapes.
-        This function gives every instance a distinct id.
-        First unique op with same I/O receives the index 0.
-        The counter restart when the session goes to the
-        next image.
-        """
-        if row["cat"] == "Session":
-            occurences[0] = {}
-            return -1
-        assert "idx" not in groupkey
-        vals = [row[k] for k in groupkey]
-        key = tuple(map(str, vals))
-        if key not in occurences[0]:
-            occurences[0][key] = 0
-        else:
-            occurences[0][key] += 1
-        return occurences[0][key]
-
-    df = df.copy()
-    occurences = [{}]
-    df["idx"] = df.apply(_idx, axis=1)
-    df = df[(df["cat"] == "Node") & df["name"].str.contains("kernel_time")]
-    groupkey.append("idx")
-    for c in groupkey:
-        if c != "idx":
-            df[c] = df[c].apply(str)
-    gr = df[groupkey + ["dur"]].groupby(groupkey)
-    return gr.sum()
-
-
-base = preprocess(prof_base)
-opti = preprocess(prof_opt)
-merge = base.merge(
-    opti, how="outer", suffixes=("base", "opti"), left_index=True, right_index=True
-)
-merge = merge.reset_index(drop=False)
-merge.to_excel("plot_profiling_merged.xlsx", index=False)
+merge, gr = merge_ort_profile(prof_base, prof_opti)
+merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
 merge
 
-
 #####################################################
-# Aggregation
-
-
-def classify(row):
-    if numpy.isnan(row["duropti"]):
-        return "-"
-    if numpy.isnan(row["durbase"]):
-        return "+"
-    return "="
+# More detailed
 
-
-keys = {"float": "f"}
-
-
-def process_shape(s):
-    value = eval(s)
-    ns = []
-    for v in value:
-        if len(v) != 1:
-            raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
-        k, v = list(v.items())[0]
-        n = "-".join([keys[k], "x".join(map(str, v))])
-        ns.append(n)
-    return ",".join(ns)
-
-
-def label(row):
-    name = row["args_op_name"]
-    inshape = process_shape(row["args_input_type_shape"])
-    outshape = process_shape(row["args_output_type_shape"])
-    side = row["side"][0]
-    prov = row["args_provider"][:3]
-    idx = row["idx"]
-    return f"[{side}{prov}]{name}({inshape})->{outshape}[{idx}]"
-
-
-df = merge.copy()
-df["side"] = df.apply(classify, axis=1)
-df["label"] = df.apply(label, axis=1)
-gr = (
-    df[["label", "durbase", "duropti", "idx"]]
-    .groupby("label")
-    .agg({"durbase": numpy.sum, "duropti": numpy.sum, "idx": max})
-)
+gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)
 gr
 
 ################################
@@ -210,11 +107,10 @@ def label(row):
 gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
 ax[0].set_title("Side by side duration")
 gr = gr.copy()
-gr["idx"] += 1
-gr[["idx"]].plot.barh(ax=ax[1])
+gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
 ax[1].set_title("Side by side count")
 fig.tight_layout()
-fig.savefig("plot_profiling_side_by_side.png")
+fig.savefig(f"plot_profiling_side_by_side{suffix}.png")
 
 
 ########################################
@@ -231,21 +127,44 @@ def label(row):
         disable_optimization=True,
         providers=["CUDAExecutionProvider"],
     )
+    prof_base.to_excel(f"prof_cuda_base{suffix}.xlsx", index=False)
+
     prof_opti = ort_profile(
         optimized,
         feeds,
         repeat=6,
         disable_optimization=True,
-        providers=["CUDAExecutionProvider"],
+        providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
     )
+    prof_opti.to_excel(f"prof_cuda_opti{suffix}.xlsx", index=False)
 
     unique_op = set(prof_base["args_op_name"])
     fig, ax = plt.subplots(2, 2, figsize=(10, len(unique_op)), sharex="col")
-    plot_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
-    plot_profile(prof_opt, ax[1, 0], ax[1, 1], title="optimized")
-    fig.savefig("plot_profiling_cuda.png")
+    plot_ort_profile(prof_base, ax[0, 0], ax[0, 1], title="baseline")
+    plot_ort_profile(prof_opti, ax[1, 0], ax[1, 1], title="optimized")
+    fig.tight_layout()
+    fig.savefig(f"plot_profiling_cuda{suffix}.png")
+
+    merge, gr = merge_ort_profile(prof_base, prof_opti)
+    merge.to_excel(f"plot_profiling_merged{suffix}.xlsx", index=False)
+    gr.to_excel(f"plot_profiling_merged_details{suffix}.xlsx", index=False)
+
+    grmax = gr["durbase"] + gr["duropti"]
+    total = grmax.sum()
+    grmax /= total
+    gr = gr[grmax >= 0.01]
+
+    fig, ax = plt.subplots(1, 2, figsize=(14, min(gr.shape[0], 500)), sharey=True)
+    gr[["durbase", "duropti"]].plot.barh(ax=ax[0])
+    ax[0].set_title("Side by side duration")
+    gr = gr.copy()
+    gr[["countbase", "countopti"]].plot.barh(ax=ax[1])
+    ax[1].set_title("Side by side count")
+    fig.tight_layout()
+    fig.savefig(f"plot_profiling_side_by_side_cuda{suffix}.png")
+
 else:
-    print(f"CUDA not available in {get_available_providers()}")
+    print(f"CUDA not available in {get_available_providers()}.")
     fig, ax = None, None
 
 ax
@@ -1,10 +1,11 @@
 import unittest
+import os
 import numpy as np
-from pandas import DataFrame
+from pandas import DataFrame, read_excel
 from onnx_array_api.npx import absolute, jit_onnx
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
-from onnx_array_api.ort.ort_profile import ort_profile
+from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile
 
 
 class TestOrtProfile(ExtTestCase):
@@ -27,10 +28,36 @@ def myloss(x, y):
         self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
         optimized = ort_optimized_model(onx)
         prof = ort_profile(optimized, feeds)
+        prof.to_csv("prof.csv", index=False)
         self.assertIsInstance(prof, DataFrame)
         prof = ort_profile(optimized, feeds, as_df=False)
         self.assertIsInstance(prof, list)
 
+    def test_merge_ort_profile(self):
+        data = os.path.join(os.path.dirname(__file__), "data")
+        df1 = read_excel(os.path.join(data, "prof_base.xlsx"))
+        df2 = read_excel(os.path.join(data, "prof_opti.xlsx"))
+        merged, gr = merge_ort_profile(df1, df2)
+        self.assertEqual(merged.shape, (23, 9))
+        self.assertEqual(
+            list(merged.columns),
+            [
+                "args_op_name",
+                "args_output_type_shape",
+                "args_input_type_shape",
+                "args_provider",
+                "idx",
+                "durbase",
+                "countbase",
+                "duropti",
+                "countopti",
+            ],
+        )
+        self.assertEqual(gr.shape, (19, 4))
+        self.assertEqual(
+            list(gr.columns), ["durbase", "duropti", "countbase", "countopti"]
+        )
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)