Spaces:

ArneBinder
/

ScientificArgumentRecommender

Running

App Files Files Community

ScientificArgumentRecommender / src /evaluate_documents.py

ArneBinder

https://github.com/ArneBinder/pie-document-level/pull/312

3133b5e verified 6 months ago

raw

history blame

3.99 kB

	import pyrootutils

	root = pyrootutils.setup_root(
	search_from=__file__,
	indicator=[".project-root"],
	pythonpath=True,
	dotenv=True,
	)

	# ------------------------------------------------------------------------------------ #
	# `pyrootutils.setup_root(...)` is an optional line at the top of each entry file
	# that helps to make the environment more robust and convenient
	#
	# the main advantages are:
	# - allows you to keep all entry files in "src/" without installing project as a package
	# - makes paths and scripts always work no matter where is your current work dir
	# - automatically loads environment variables from ".env" file if exists
	#
	# how it works:
	# - the line above recursively searches for either ".git" or "pyproject.toml" in present
	# and parent dirs, to determine the project root dir
	# - adds root dir to the PYTHONPATH (if `pythonpath=True`), so this file can be run from
	# any place without installing project as a package
	# - sets PROJECT_ROOT environment variable which is used in "configs/paths/default.yaml"
	# to make all paths always relative to the project root
	# - loads environment variables from ".env" file in root dir (if `dotenv=True`)
	#
	# you can remove `pyrootutils.setup_root(...)` if you:
	# 1. either install project as a package or move each entry file to the project root dir
	# 2. simply remove PROJECT_ROOT variable from paths in "configs/paths/default.yaml"
	# 3. always run entry files from the project root dir
	#
	# https://github.com/ashleve/pyrootutils
	# ------------------------------------------------------------------------------------ #

	from typing import Any, Tuple

	import hydra
	import pytorch_lightning as pl
	from omegaconf import DictConfig
	from pie_datasets import DatasetDict
	from pytorch_ie.core import DocumentMetric
	from pytorch_ie.metrics import * # noqa: F403

	from src import utils
	from src.metrics import * # noqa: F403

	log = utils.get_pylogger(__name__)


	@utils.task_wrapper
	def evaluate_documents(cfg: DictConfig) -> Tuple[dict, dict]:
	"""Evaluates serialized PIE documents.

	This method is wrapped in optional @task_wrapper decorator which applies extra utilities
	before and after the call.
	Args:
	cfg (DictConfig): Configuration composed by Hydra.
	Returns:
	Tuple[dict, dict]: Dict with metrics and dict with all instantiated objects.
	"""

	# Set seed for random number generators in pytorch, numpy and python.random
	if cfg.get("seed"):
	pl.seed_everything(cfg.seed, workers=True)

	# Init pytorch-ie dataset
	log.info(f"Instantiating dataset <{cfg.dataset._target_}>")
	dataset: DatasetDict = hydra.utils.instantiate(cfg.dataset, _convert_="partial")

	# Init pytorch-ie taskmodule
	log.info(f"Instantiating metric <{cfg.metric._target_}>")
	metric: DocumentMetric = hydra.utils.instantiate(cfg.metric, _convert_="partial")

	# auto-convert the dataset if the metric specifies a document type
	dataset = metric.convert_dataset(dataset)

	# Init lightning loggers
	loggers = utils.instantiate_dict_entries(cfg, "logger")

	object_dict = {
	"cfg": cfg,
	"dataset": dataset,
	"metric": metric,
	"logger": loggers,
	}

	if loggers:
	log.info("Logging hyperparameters!")
	# send hparams to all loggers
	for logger in loggers:
	logger.log_hyperparams(cfg)

	splits = cfg.get("splits", None)
	if splits is None:
	documents = dataset
	else:
	documents = type(dataset)({k: v for k, v in dataset.items() if k in splits})

	metric_dict = metric(documents)

	return metric_dict, object_dict


	@hydra.main(
	version_base="1.2", config_path=str(root / "configs"), config_name="evaluate_documents.yaml"
	)
	def main(cfg: DictConfig) -> Any:
	metric_dict, _ = evaluate_documents(cfg)
	return metric_dict


	if __name__ == "__main__":
	utils.replace_sys_args_with_values_from_files()
	utils.prepare_omegaconf()
	main()