Performance

Performance comparison

Here, we visualize the performance of three runtimes: PyTorch, our custom-built tinyRuntime (both non-quantized and quantized versions), using the ResNet18 model and 100 images from the Imagenette dataset. We focus on four key metrics: accuracy, execution time, model size and memory usage.

AMD64

Code
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display

def split_dataframe(df):
    '''Split dataframe based on Runtime (Pytorch, tinyRuntime (no quant) and tinyRuntime (quant).'''
    df_pytorch = df[df["Runtime"] == "PyTorch"]
    df_trv = df[(df["Runtime"] == "tinyRuntime") & (df["Quantization"] == False)]
    df_trq = df[(df["Runtime"] == "tinyRuntime") & (df["Quantization"] == True)]
    return df_pytorch, df_trv, df_trq

def plot_perf_comp(df, save_image=False):
    '''Plot latest performance comparisons using Plotly.'''
    dfs = split_dataframe(df)
    
    # Create subplots using Plotly Figure Factory
    fig = make_subplots(rows=2, cols=2, subplot_titles=("Accuracy", "Time", "Max memory usage", "Model size"))

    metrics = ["Accuracy", "Time", "Max memory", "Model size"]
    colors = ['rgba(31, 119, 180, 0.8)', 'rgba(255, 127, 14, 0.8)', 'rgba(44, 160, 44, 0.8)']
    names = ["PyTorch", "tinyRuntime (no quant)", "tinyRuntime (quant)"]

    for i, metric in enumerate(metrics):
        # Retrieve values for each metric
        y_values = [df[metric].values[-1] for df in dfs]
        
        # Add trace for each runtime
        for j, name in enumerate(names):
            trace = go.Bar(x=[name], y=[y_values[j]], marker_color=colors[j], showlegend=(i == 0), name=name)
            fig.add_trace(trace, row=i // 2 + 1, col=i % 2 + 1)

    # Set layout, background color and font size, and disable legend click feature
    fig.update_layout(
        legend=dict(orientation="h", yanchor="bottom", y=1.1, xanchor="right", x=1),
        template="plotly_dark", legend_itemclick=False, legend_itemdoubleclick=False, font=dict(size=14),
        margin=dict(t=90, b=60, l=80, r=50))

    # Update axis labels
    fig.update_yaxes(title_text="Accuracy (%)", row=1, col=1)
    fig.update_yaxes(title_text="Time (s)", row=1, col=2)
    fig.update_yaxes(title_text="Max memory usage (MB)", row=2, col=1)
    fig.update_yaxes(title_text="Model size (MB)", row=2, col=2)
    fig.update_xaxes(showticklabels=False)
    # Show the plot with modebar hidden
    fig.show(config={'displayModeBar': False})
    
    if save_image:
        fig.write_image("images/perf_bar.png")

    # Create DataFrame
    data = {
        "Accuracy (%)": [df["Accuracy"].values[-1] for df in dfs],
        "Time (s)": [df["Time"].values[-1] for df in dfs],
        "Max memory usage (MB)": [df["Max memory"].values[-1] for df in dfs],
        "Model size (MB)": [df["Model size"].values[-1] for df in dfs]
    }
    df_results = pd.DataFrame(data, index=["PyTorch", "tinyRuntime (no quant)", "tinyRuntime (quant)"])
    display(df_results)

df = pd.read_csv('benchmark.csv')
df_x86 = df[df["Architecture"] == "x86_64"]
plot_perf_comp(df_x86)
Figure 1: Performance comparison on AMD64
Accuracy (%) Time (s) Max memory usage (MB) Model size (MB)
PyTorch 99.0 11.247210 473.507812 44.938353
tinyRuntime (no quant) 99.0 9.644408 98.714844 44.660263
tinyRuntime (quant) 99.0 7.335336 62.062500 11.197475
Code
def plot_radar(df, save_image=False):
    # Normalize the data
    df = df.copy()
    df["Time"] = df["Time"].apply(lambda x: x / df["Time"].max())
    df["Accuracy"] = df["Accuracy"].apply(lambda x: x / 100)
    df["Max memory"] = df["Max memory"].apply(lambda x: x / df["Max memory"].max())
    # Split based on runtime and take three relevant columns
    dfs = [df_split[['Time', 'Max memory', 'Accuracy']] for df_split in split_dataframe(df)]
    # Create a radar chart
    categories = ['Time', 'Memory usage', 'Accuracy']
    runtimes = ["PyTorch", "tinyRuntime (no quant)", "tinyRuntime (quant)"]
    fig = go.Figure()
    for i, df in enumerate(dfs):
        fig.add_trace(go.Scatterpolar(
            r=df.iloc[-1].values,
            theta=categories,
            fill='toself',
            name=runtimes[i]
        ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 1]
            )
        ),
        font=dict(size=16),
        margin=dict(t=50, b=50, l=50, r=50),
        showlegend=True
    )
    fig.add_annotation(
        text="*Values are normalized between 0 and 1",
        xref="paper", yref="paper",
        x=0.5, y=-0.1,
        showarrow=False,
        font=dict(size=15)
    )
    fig.show()
    
    if save_image:
        fig.write_image("images/perf_radar.png")
    
plot_radar(df_x86)
Figure 2: Radar chart of time, memory, and accuracy for AMD64. Quantities are scaled between 0 and 1

ARM64

Code
df_arm = df[df["Architecture"] == "arm64"]
plot_perf_comp(df_arm, save_image=True)
Figure 3: Performance comparison on ARM64
Accuracy (%) Time (s) Max memory usage (MB) Model size (MB)
PyTorch 99.0 6.832752 462.390625 44.938353
tinyRuntime (no quant) 99.0 2.672380 143.281250 44.660263
tinyRuntime (quant) 99.0 3.812798 93.515625 11.949406
Code
plot_radar(df_arm, save_image=True)
Figure 4: Radar chart of time, memory, and accuracy for ARM64. Quantities are scaled between 0 and 1

Runtime Optimization Progress

Code
def plot_progress_subplot(fig, row, col, df, metric, showlegend):
    '''Plot evolution of execution time using Plotly.'''
    df_pytorch, df_trv, df_trq = split_dataframe(df)

    colors = ['rgba(31, 119, 180, 0.8)', 'rgba(255, 127, 14, 0.8)', 'rgba(44, 160, 44, 0.8)']
    fig.add_trace(go.Scatter(x=df_pytorch["Datetime"], y=df_pytorch[metric], mode='lines+markers', name='PyTorch',
                             line=dict(color=colors[0]), showlegend=showlegend), row=row, col=col)
    fig.add_trace(go.Scatter(x=df_trv["Datetime"], y=df_trv[metric], mode='lines+markers', name='tinyRuntime (no quant)',
                             line=dict(color=colors[1]), showlegend=showlegend), row=row, col=col)
    fig.add_trace(go.Scatter(x=df_trq["Datetime"], y=df_trq[metric], mode='lines+markers', name='tinyRuntime (quant)',
                             line=dict(color=colors[2]), showlegend=showlegend), row=row, col=col)

def plot_progress(metric, ylabel, title, save_image=False):
    fig = make_subplots(rows=2, cols=1, subplot_titles=('AMD64', 'ARM64'), shared_xaxes=True, vertical_spacing=0.15)
    
    plot_progress_subplot(fig, 1, 1, df_x86, metric, showlegend=True)
    plot_progress_subplot(fig, 2, 1, df_arm, metric, showlegend=False)
    
    fig.update_xaxes(title_text="Datetime", row=2, col=1)
    fig.update_xaxes(showticklabels=False, row=1, col=1)
    fig.update_yaxes(title_text=ylabel, row=1, col=1)
    fig.update_yaxes(title_text=ylabel, row=2, col=1)
    
    fig.update_layout(height=800, showlegend=True,
                      legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
                      legend_itemclick=False, legend_itemdoubleclick=False, font=dict(size=14), margin=dict(t=100, b=100, l=80, r=50),
                      template="plotly_dark", title=f"{title} History")
    fig.show(config={'displayModeBar': False})
    if save_image:
        fig.write_image("images/perf_progress.png")

plot_progress("Time", "Time (s)", "Execution Time", save_image=True)
Figure 5: Time
Code
plot_progress("Max memory", "Max memory usage (MB)", "Memory Usage")
Figure 6: Max memory usage
Code
plot_progress("Accuracy", "Accuracy (%)", "Accuracy")
Figure 7: Accuracy