Konstantin Chernyshev
chore: add about section
79ede97
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">U-MATH / μ-MATH leaderboard</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
These datasets are designed to test the mathematical reasoning and meta-evaluation capabilities of Large Language Models (LLMs) on university-level problems.
U-MATH provides a set of 1,100 university-level mathematical problems, while µ-MATH complements it with a meta-evaluation framework focusing on solution judgment with 1084 LLM solutions.
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = """
This repository contains the official leaderboard code for the U-MATH and $\mu$-MATH benchmarks. These datasets are designed to test the mathematical reasoning and meta-evaluation capabilities of Large Language Models (LLMs) on university-level problems.
### Overview
U-MATH provides a set of 1,100 university-level mathematical problems, while µ-MATH complements it with a meta-evaluation framework focusing on solution judgment with 1084 LLM solutions.
* 📊 [U-MATH benchmark at Huggingface](https://huggingface.co/datasets/toloka/umath)
* 🔎 [μ-MATH benchmark at Huggingface](https://huggingface.co/datasets/toloka/mumath)
* 🗞️ [Paper](https://arxiv.org/abs/2412.03205)
* 👾 [Evaluation Code at GitHub](https://github.com/Toloka/u-math/)
### Licensing Information
* The contents of the μ-MATH's machine-generated `model_output` column are subject to the underlying LLMs' licensing terms.
* Contents of all the other dataset U-MATH and μ-MATH fields, as well as the code, are available under the MIT license.
"""
CITATION_TEXT = r"""@misc{chernyshev2024umath,
title={U-MATH: A University-Level Benchmark for Evaluating Mathematical Skills in LLMs},
author={Konstantin Chernyshev and Vitaliy Polshkov and Ekaterina Artemova and Alex Myasnikov and Vlad Stepanov and Alexei Miasnikov and Sergei Tilga},
year={2024},
eprint={2412.03205},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.03205},
}"""