File size: 3,989 Bytes
3133b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pyrootutils

root = pyrootutils.setup_root(
    search_from=__file__,
    indicator=[".project-root"],
    pythonpath=True,
    dotenv=True,
)

# ------------------------------------------------------------------------------------ #
# `pyrootutils.setup_root(...)` is an optional line at the top of each entry file
# that helps to make the environment more robust and convenient
#
# the main advantages are:
# - allows you to keep all entry files in "src/" without installing project as a package
# - makes paths and scripts always work no matter where is your current work dir
# - automatically loads environment variables from ".env" file if exists
#
# how it works:
# - the line above recursively searches for either ".git" or "pyproject.toml" in present
#   and parent dirs, to determine the project root dir
# - adds root dir to the PYTHONPATH (if `pythonpath=True`), so this file can be run from
#   any place without installing project as a package
# - sets PROJECT_ROOT environment variable which is used in "configs/paths/default.yaml"
#   to make all paths always relative to the project root
# - loads environment variables from ".env" file in root dir (if `dotenv=True`)
#
# you can remove `pyrootutils.setup_root(...)` if you:
# 1. either install project as a package or move each entry file to the project root dir
# 2. simply remove PROJECT_ROOT variable from paths in "configs/paths/default.yaml"
# 3. always run entry files from the project root dir
#
# https://github.com/ashleve/pyrootutils
# ------------------------------------------------------------------------------------ #

from typing import Any, Tuple

import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig
from pie_datasets import DatasetDict
from pytorch_ie.core import DocumentMetric
from pytorch_ie.metrics import *  # noqa: F403

from src import utils
from src.metrics import *  # noqa: F403

log = utils.get_pylogger(__name__)


@utils.task_wrapper
def evaluate_documents(cfg: DictConfig) -> Tuple[dict, dict]:
    """Evaluates serialized PIE documents.

    This method is wrapped in optional @task_wrapper decorator which applies extra utilities
    before and after the call.
    Args:
        cfg (DictConfig): Configuration composed by Hydra.
    Returns:
        Tuple[dict, dict]: Dict with metrics and dict with all instantiated objects.
    """

    # Set seed for random number generators in pytorch, numpy and python.random
    if cfg.get("seed"):
        pl.seed_everything(cfg.seed, workers=True)

    # Init pytorch-ie dataset
    log.info(f"Instantiating dataset <{cfg.dataset._target_}>")
    dataset: DatasetDict = hydra.utils.instantiate(cfg.dataset, _convert_="partial")

    # Init pytorch-ie taskmodule
    log.info(f"Instantiating metric <{cfg.metric._target_}>")
    metric: DocumentMetric = hydra.utils.instantiate(cfg.metric, _convert_="partial")

    # auto-convert the dataset if the metric specifies a document type
    dataset = metric.convert_dataset(dataset)

    # Init lightning loggers
    loggers = utils.instantiate_dict_entries(cfg, "logger")

    object_dict = {
        "cfg": cfg,
        "dataset": dataset,
        "metric": metric,
        "logger": loggers,
    }

    if loggers:
        log.info("Logging hyperparameters!")
        # send hparams to all loggers
        for logger in loggers:
            logger.log_hyperparams(cfg)

    splits = cfg.get("splits", None)
    if splits is None:
        documents = dataset
    else:
        documents = type(dataset)({k: v for k, v in dataset.items() if k in splits})

    metric_dict = metric(documents)

    return metric_dict, object_dict


@hydra.main(
    version_base="1.2", config_path=str(root / "configs"), config_name="evaluate_documents.yaml"
)
def main(cfg: DictConfig) -> Any:
    metric_dict, _ = evaluate_documents(cfg)
    return metric_dict


if __name__ == "__main__":
    utils.replace_sys_args_with_values_from_files()
    utils.prepare_omegaconf()
    main()