File size: 3,989 Bytes
3133b5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import pyrootutils
root = pyrootutils.setup_root(
search_from=__file__,
indicator=[".project-root"],
pythonpath=True,
dotenv=True,
)
# ------------------------------------------------------------------------------------ #
# `pyrootutils.setup_root(...)` is an optional line at the top of each entry file
# that helps to make the environment more robust and convenient
#
# the main advantages are:
# - allows you to keep all entry files in "src/" without installing project as a package
# - makes paths and scripts always work no matter where is your current work dir
# - automatically loads environment variables from ".env" file if exists
#
# how it works:
# - the line above recursively searches for either ".git" or "pyproject.toml" in present
# and parent dirs, to determine the project root dir
# - adds root dir to the PYTHONPATH (if `pythonpath=True`), so this file can be run from
# any place without installing project as a package
# - sets PROJECT_ROOT environment variable which is used in "configs/paths/default.yaml"
# to make all paths always relative to the project root
# - loads environment variables from ".env" file in root dir (if `dotenv=True`)
#
# you can remove `pyrootutils.setup_root(...)` if you:
# 1. either install project as a package or move each entry file to the project root dir
# 2. simply remove PROJECT_ROOT variable from paths in "configs/paths/default.yaml"
# 3. always run entry files from the project root dir
#
# https://github.com/ashleve/pyrootutils
# ------------------------------------------------------------------------------------ #
from typing import Any, Tuple
import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig
from pie_datasets import DatasetDict
from pytorch_ie.core import DocumentMetric
from pytorch_ie.metrics import * # noqa: F403
from src import utils
from src.metrics import * # noqa: F403
log = utils.get_pylogger(__name__)
@utils.task_wrapper
def evaluate_documents(cfg: DictConfig) -> Tuple[dict, dict]:
"""Evaluates serialized PIE documents.
This method is wrapped in optional @task_wrapper decorator which applies extra utilities
before and after the call.
Args:
cfg (DictConfig): Configuration composed by Hydra.
Returns:
Tuple[dict, dict]: Dict with metrics and dict with all instantiated objects.
"""
# Set seed for random number generators in pytorch, numpy and python.random
if cfg.get("seed"):
pl.seed_everything(cfg.seed, workers=True)
# Init pytorch-ie dataset
log.info(f"Instantiating dataset <{cfg.dataset._target_}>")
dataset: DatasetDict = hydra.utils.instantiate(cfg.dataset, _convert_="partial")
# Init pytorch-ie taskmodule
log.info(f"Instantiating metric <{cfg.metric._target_}>")
metric: DocumentMetric = hydra.utils.instantiate(cfg.metric, _convert_="partial")
# auto-convert the dataset if the metric specifies a document type
dataset = metric.convert_dataset(dataset)
# Init lightning loggers
loggers = utils.instantiate_dict_entries(cfg, "logger")
object_dict = {
"cfg": cfg,
"dataset": dataset,
"metric": metric,
"logger": loggers,
}
if loggers:
log.info("Logging hyperparameters!")
# send hparams to all loggers
for logger in loggers:
logger.log_hyperparams(cfg)
splits = cfg.get("splits", None)
if splits is None:
documents = dataset
else:
documents = type(dataset)({k: v for k, v in dataset.items() if k in splits})
metric_dict = metric(documents)
return metric_dict, object_dict
@hydra.main(
version_base="1.2", config_path=str(root / "configs"), config_name="evaluate_documents.yaml"
)
def main(cfg: DictConfig) -> Any:
metric_dict, _ = evaluate_documents(cfg)
return metric_dict
if __name__ == "__main__":
utils.replace_sys_args_with_values_from_files()
utils.prepare_omegaconf()
main()
|