core_leaderboard

Running

File size: 13,028 Bytes

import json
import plotly.express as px
from utils.pareto import Agent, compute_pareto_frontier
import plotly.graph_objects as go
import textwrap

def create_task_success_heatmap(df, benchmark_name):
    # Calculate agent accuracy
    agent_accuracy = df.groupby('Agent Name')['Success'].mean().sort_values(ascending=False)
    
    # Calculate task success rate
    task_success_rate = df.groupby('Task ID')['Success'].mean().sort_values(ascending=False)
    
    # Pivot the dataframe to create a matrix of agents vs tasks
    pivot_df = df.pivot(index='Agent Name', columns='Task ID', values='Success')
    
    # Sort the pivot table
    pivot_df = pivot_df.reindex(index=agent_accuracy.index, columns=task_success_rate.index)

    num_agents = len(pivot_df.index)
    row_height = 30  # Fixed height for each row in pixels
    total_height = num_agents * row_height
    
    # Create the heatmap
    fig = go.Figure(data=go.Heatmap(
        z=pivot_df.values,
        y=pivot_df.index,
        x=pivot_df.columns,
        colorscale=[[0, 'white'], [1, '#3498db']],  # White for failed, green for success
        showscale=False,
        hovertemplate='<b>Agent:</b> %{y}<br>' +
                      '<b>Task:</b> %{x}<br>' +
                      '<b>Status:</b> %{z}<extra></extra>'
    ))
    
    # Update the layout
    fig.update_layout(
        xaxis_title='Task ID',
        height=total_height,
        # width=1150,
        yaxis=dict(
            autorange='reversed',
            showticklabels=True,  # Show y-axis tick labels (agent names)
            showline=True,
            linecolor='black',
            showgrid=False
        ),
        xaxis=dict(
            side='top',
            showticklabels=False,  # Hide x-axis tick labels (task IDs)
            showline=True,
            linecolor='black',
            showgrid=False
        ),
        plot_bgcolor='white',
        paper_bgcolor='white',
        hoverlabel=dict(
            bgcolor="white", 
            font_size=12, 
            font_family="Arial"
        ),
        modebar=dict(
            activecolor='#1f77b4',
            orientation='h',
            bgcolor='rgba(255,255,255,0.8)',
            color='#777',
            add=['pan2d'],
            remove=[
                'zoom2d', 'zoomIn2d', 'zoomOut2d', 'resetScale2d',
                'hoverClosestCartesian', 'hoverCompareCartesian',
                'toggleSpikelines', 'lasso2d', 'lasso', 'select2d', 'select'
            ]
        ),
        dragmode='pan'
    )
    
    return fig

def create_bar_chart(categories, values, x_label, y_label, title):
    # Sort categories and values based on values in descending order
    sorted_data = sorted(zip(categories, values), key=lambda x: x[1], reverse=True)
    categories, values = zip(*sorted_data)

    # get total number of tasks
    total_tasks = sum(values)

    text_labels = [f"({value/total_tasks:.1%} of failures)" for value in values]


    fig = go.Figure(data=[go.Bar(
        y=categories,
        x=values,
        orientation='h',
        marker_color='#3498db',  # Same color as the scatter plot
        text=text_labels,
        textposition='auto',
        customdata=[f'{value} tasks ({value/total_tasks:.1%} of failures)' for value in values],
        textfont=dict(color='black', size=14, family='Arial', weight=2),
        hovertemplate='<b>%{y}</b><br>' +
                      'Affected Tasks: %{customdata}<extra></extra>'
    )])

    fig.update_layout(
        height=600,
        xaxis=dict(
            showline=True,
            linecolor='black',
            showgrid=False
        ),
        yaxis=dict(
            showline=True,
            linecolor='black',
            showgrid=False,
            autorange="reversed"  # This will put the category with the highest value at the top
        ),
        plot_bgcolor='white',
        paper_bgcolor='white',
        bargap=0.2,
        bargroupgap=0.1,
        hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
        modebar=dict(
            activecolor='#1f77b4',
            orientation='h',
            bgcolor='rgba(255,255,255,0.8)',
            color='#777',
            add=['pan2d'],
            remove=[
                'zoom2d', 'zoomIn2d', 'zoomOut2d', 'resetScale2d',
                'hoverClosestCartesian', 'hoverCompareCartesian',
                'toggleSpikelines', 'lasso2d', 'lasso', 'select2d', 'select'
            ]
        ),
        dragmode='pan'
    )

    return fig

def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
    agents = [Agent(row['Total Cost'], row['Accuracy']) for i, row in df.iterrows()]
    pareto_frontier = compute_pareto_frontier(agents)

    fig = px.scatter(df, 
                     x=x, 
                     y=y,
                     custom_data=hover_data)
    fig.update_traces(
            hovertemplate="<br>".join([
                "<b>Agent</b>: %{customdata[0]}",
                "<b>Total Cost</b>: $%{x:.1f}",
                "<b>Accuracy</b>: %{y:.1%}",
            ])
        )
    
    fig.update_traces(marker=dict(size=10, color='#3498db'),
                      hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),)
    

    # Sort the Pareto frontier points by x-coordinate
    pareto_points = sorted([(agent.total_cost, agent.accuracy) for agent in pareto_frontier], key=lambda x: x[0])
    
    # Add the Pareto frontier line
    fig.add_trace(go.Scatter(
        x=[point[0] for point in pareto_points],
        y=[point[1] for point in pareto_points],
        mode='lines',
        name='Pareto Frontier',
        line=dict(color='black', width=1, dash='dash')
    ))

    fig.update_yaxes(rangemode="tozero")
    fig.update_xaxes(rangemode="tozero")

    fig.update_layout(
    # width = 1150,
    height = 600,
    xaxis_title = x_label,
    yaxis_title = y_label,
    xaxis = dict(
        showline = True,
        linecolor = 'black',
        showgrid = False),
    yaxis = dict(
        showline = True,
        showgrid = False,
        linecolor = 'black'),
    plot_bgcolor = 'white',
    # Legend positioning
    legend=dict(
        yanchor="bottom",
        y=0.01,
        xanchor="right",
        x=0.98,
        bgcolor="rgba(255, 255, 255, 0.5)"  # semi-transparent white background
        ),
    modebar=dict(
            activecolor='#1f77b4',  # Color of active tool
            orientation='h',  # Vertical orientation
            bgcolor='rgba(255,255,255,0.8)',  # Slightly transparent white background
            color='#777',  # Color of inactive tools
            add = ['pan2d'],
            remove = [
                'zoom2d',
                'zoomIn2d',
                'zoomOut2d', 
                'resetScale2d',
                'hoverClosestCartesian', 
                'hoverCompareCartesian',
                'toggleSpikelines',
                'lasso2d',
                'lasso',
                'select2d',
                'select']
        ),
    dragmode='pan'
    )
    return fig


import plotly.graph_objects as go
import textwrap

def create_flow_chart(steps):
    node_x = []
    node_y = []
    edge_x = []
    edge_y = []
    node_text = []
    hover_text = []
    node_colors = []
    node_shapes = []
    
    # Define color and shape mappings
    color_map = {True: 'green', False: 'red'}  # True for success, False for challenges
    shape_map = {
        'plan': 'octagon',
        'tool': 'square',
        'retrieve': 'diamond',
        'other': 'circle'
    }
    
    for i, step in enumerate(steps):
        node_x.append(i)
        node_y.append(0)
        
        # Extract Description, Assessment, and new attributes
        analysis = step['analysis']
        if isinstance(analysis, str):
            try:
                analysis = json.loads(analysis)
            except json.JSONDecodeError:
                analysis = {}
        
        description = analysis.get('description', 'No description available.')
        assessment = analysis.get('assessment', 'No assessment available.')
        success = analysis.get('success', True)  # Assuming True if not specified
        # action_type = analysis.get('action_type', 'other')  # Default to 'other' if not specified
        step_headline = analysis.get('headline', '')
        
        # Set node color and shape based on attributes
        node_colors.append(color_map[success])
        # node_shapes.append(shape_map.get(action_type, 'circle'))
        
        # Wrap text to improve readability
        wrapped_description = '<br>'.join(textwrap.wrap(description, width=90, max_lines=20))
        wrapped_assessment = '<br>'.join(textwrap.wrap(assessment, width=90, max_lines=10))
        wrapped_outline = textwrap.shorten(step_headline, width=50, placeholder='')
        wrapped_outline = '' if wrapped_outline == '' else f": {wrapped_outline}"

        node_text_outline = '' if wrapped_outline == '' else f":<br>{'<br>'.join(textwrap.wrap(step_headline, width=30, placeholder=''))}"
        node_text.append(f"Step {i+1}{node_text_outline}")
        
        # Create formatted hover text without indentation
        hover_info = f"<b>Step {i+1}{wrapped_outline}</b><br><br>" \
                     f"<b>Description:</b><br>" \
                     f"{wrapped_description}<br><br>" \
                    #  f"<b>Assessment:</b><br>" \
                    #  f"{wrapped_assessment}<br><br>" \
                    #  f"<b>Successful:</b> {'Yes' if success else 'No'}<br>" \
                    #  f"<b>Action Type:</b> {action_type.capitalize()}"
        hover_text.append(hover_info)
        
        if i > 0:
            edge_x.extend([i-1, i, None])
            edge_y.extend([0, 0, None])
    
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        text=node_text,
        textposition="top center",
        showlegend=False,
        hovertext=hover_text,
        hoverinfo='text',
        hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
        marker=dict(
            # color=node_colors,
            color='#3498db',
            size=30,
            line_width=2,
            # symbol=node_shapes
        ))

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=2, color='#888'),
        hoverinfo='none',
        showlegend=False,
        mode='lines')
    
    # Create legend traces
    legend_traces = []
    
    # # Color legend
    # for success, color in color_map.items():
    #     legend_traces.append(go.Scatter(
    #         x=[None], y=[None],
    #         mode='markers',
    #         marker=dict(size=10, color=color),
    #         showlegend=True,
    #         name=f"{'Success' if success else 'Issue'}"
    #     ))
    
    # # Shape legend
    # for action, shape in shape_map.items():
    #     legend_traces.append(go.Scatter(
    #         x=[None], y=[None],
    #         mode='markers',
    #         marker=dict(size=10, symbol=shape, color='gray'),
    #         showlegend=True,
    #         name=f"{action.capitalize()}"
    #     ))

    # Combine all traces
    all_traces = [edge_trace, node_trace] + legend_traces

    layout = go.Layout(
        showlegend=True,
        hovermode='closest',
        margin=dict(b=20,l=5,r=5,t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        plot_bgcolor='white',
        paper_bgcolor='white',
        modebar=dict(
            activecolor='#1f77b4',  # Color of active tool
            orientation='h',  # Vertical orientation
            bgcolor='rgba(255,255,255,0.8)',  # Slightly transparent white background
            color='#777',  # Color of inactive tools
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=0.02,
            xanchor="right",
            x=1,
            bgcolor='rgba(255,255,255,0.8)',
            bordercolor='rgba(0,0,0,0.1)',
            borderwidth=1
        ),
    )
    
    fig = go.Figure(data=all_traces, layout=layout)
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        bgcolor='rgba(255,255,255,0.8)',  # Set legend background to slightly transparent white
        bordercolor='rgba(0,0,0,0.1)',  # Add a light border to the legend
        borderwidth=1
    ),
    dragmode='pan'
    )

    config = {
        'add': ['pan2d'],
        'remove': [
            'zoom2d', 
            'zoomIn2d', 
            'zoomOut2d', 
            'resetScale2d',
            'hoverClosestCartesian', 
            'hoverCompareCartesian',
            'toggleSpikelines',
            'lasso2d',
            'lasso',
            'select2d',
            'select',
        ]
    }
    
    # Apply the config to the figure
    fig.update_layout(modebar=config)
    
    return fig