Spaces:
Running
Running
{ | |
"perception_temporal_action_loc": { | |
"MLAB (claude-3-5-sonnet-v2)": 2.2, | |
"Top Human in Competition": 284.6, | |
"MLAB (gemini-exp-1206)": -1.3, | |
"MLAB (o3-mini)": 0.9, | |
"MLAB (gpt-4o)": 0.9, | |
"MLAB (llama3-1-405b-instruct)": 1.5, | |
"CoI-Agent (o1) + MLAB (gpt-4o)": 1.0, | |
"Human Idea + MLAB (gpt-4o)": 1.5 | |
}, | |
"llm-merging": { | |
"CoI-Agent (o1) + MLAB (gpt-4o)": -0.7, | |
"Top Human in Competition": 68.2, | |
"MLAB (claude-3-5-sonnet-v2)": 3.4, | |
"MLAB (gemini-exp-1206)": 3.4, | |
"MLAB (o3-mini)": -0.7, | |
"MLAB (gpt-4o)": 1.4, | |
"MLAB (llama3-1-405b-instruct)": -0.7, | |
"Human Idea + MLAB (gpt-4o)": -0.7 | |
}, | |
"product-recommendation": { | |
"MLAB (claude-3-5-sonnet-v2)": 12.3, | |
"Top Human in Competition": 412.6, | |
"MLAB (gemini-exp-1206)": 0.6, | |
"MLAB (o3-mini)": 0.6, | |
"MLAB (gpt-4o)": 2.6, | |
"MLAB (llama3-1-405b-instruct)": -0.0, | |
"Human Idea + MLAB (gpt-4o)": 8.9, | |
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.6 | |
}, | |
"weather_forcast": { | |
"CoI-Agent (o1) + MLAB (gpt-4o)": 83.6, | |
"Top Human in Competition": 212.0, | |
"Human Idea + MLAB (gpt-4o)": 26.1, | |
"MLAB (claude-3-5-sonnet-v2)": 31.0, | |
"MLAB (gemini-exp-1206)": 91.4, | |
"MLAB (o3-mini)": 53.3, | |
"MLAB (gpt-4o)": 100.8, | |
"MLAB (llama3-1-405b-instruct)": 66.7 | |
}, | |
"meta-learning": { | |
"MLAB (claude-3-5-sonnet-v2)": -14.9, | |
"Top Human in Competition": 304.5, | |
"MLAB (gemini-exp-1206)": -3.2, | |
"MLAB (o3-mini)": -14.9, | |
"MLAB (gpt-4o)": -14.9, | |
"MLAB (llama3-1-405b-instruct)": -14.9, | |
"Human Idea + MLAB (gpt-4o)": -14.9, | |
"CoI-Agent (o1) + MLAB (gpt-4o)": -14.9 | |
}, | |
"machine_unlearning": { | |
"Human Idea + MLAB (gpt-4o)": 4.2, | |
"Top Human in Competition": 61.9, | |
"CoI-Agent (o1) + MLAB (gpt-4o)": 7.3, | |
"MLAB (claude-3-5-sonnet-v2)": -58.6, | |
"MLAB (gemini-exp-1206)": 3.5, | |
"MLAB (o3-mini)": 2.2, | |
"MLAB (gpt-4o)": -11.1, | |
"MLAB (llama3-1-405b-instruct)": 3.8 | |
}, | |
"backdoor-trigger-recovery": { | |
"MLAB (gpt-4o)": 74.0, | |
"Top Human in Competition": 621.3, | |
"CoI-Agent (o1) + MLAB (gpt-4o)": 24.9, | |
"MLAB (claude-3-5-sonnet-v2)": 247.9, | |
"MLAB (gemini-exp-1206)": 80.4, | |
"MLAB (o3-mini)": 38.8, | |
"MLAB (llama3-1-405b-instruct)": 71.7, | |
"Human Idea + MLAB (gpt-4o)": 54.5 | |
} | |
} |