https://github.com/ArneBinder/pie-document-level/pull/312
Browse files- configs/demo.yaml +73 -0
configs/demo.yaml
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model details
|
2 |
+
default_model_name: "ArneBinder/sam-pointer-bart-base-v0.3.1"
|
3 |
+
default_model_revision: "d090d5385380692933e8a3bc466236e3a905492d"
|
4 |
+
# Whether to handle segmented entities in the document. If True, labeled_spans are converted
|
5 |
+
# to labeled_multi_spans and binary_relations with label "parts_of_same" are used to merge them.
|
6 |
+
# This requires the networkx package to be installed.
|
7 |
+
handle_parts_of_same: true
|
8 |
+
# Split the document text into sections that are processed separately.
|
9 |
+
default_split_regex: "\n\n\n+"
|
10 |
+
|
11 |
+
# retriever details
|
12 |
+
default_retriever_config_path: "configs/retriever/related_span_retriever_with_relations_from_other_docs.yaml"
|
13 |
+
default_min_similarity: 0.95
|
14 |
+
|
15 |
+
# data import details
|
16 |
+
default_arxiv_id: "1706.03762"
|
17 |
+
default_load_pie_dataset_kwargs:
|
18 |
+
path: "pie/sciarg"
|
19 |
+
name: "resolve_parts_of_same"
|
20 |
+
split: "train"
|
21 |
+
|
22 |
+
# for better readability in the UI
|
23 |
+
render_mode_captions:
|
24 |
+
displacy: "displaCy + highlighted arguments"
|
25 |
+
pretty_table: "Pretty Table"
|
26 |
+
layer_caption_mapping:
|
27 |
+
labeled_multi_spans: "adus"
|
28 |
+
binary_relations: "relations"
|
29 |
+
labeled_partitions: "partitions"
|
30 |
+
relation_name_mapping:
|
31 |
+
supports_reversed: "supported by"
|
32 |
+
contradicts_reversed: "contradicts"
|
33 |
+
|
34 |
+
default_render_mode: "displacy"
|
35 |
+
default_render_kwargs:
|
36 |
+
entity_options:
|
37 |
+
# we need to have the keys as uppercase because the spacy rendering function converts the labels to uppercase
|
38 |
+
colors:
|
39 |
+
OWN_CLAIM: "#009933"
|
40 |
+
BACKGROUND_CLAIM: "#99ccff"
|
41 |
+
DATA: "#993399"
|
42 |
+
colors_hover:
|
43 |
+
selected: "#ffa"
|
44 |
+
# tail options for relationships
|
45 |
+
tail:
|
46 |
+
# green
|
47 |
+
supports: "#9f9"
|
48 |
+
# red
|
49 |
+
contradicts: "#f99"
|
50 |
+
# do not highlight
|
51 |
+
parts_of_same: null
|
52 |
+
head: null # "#faf"
|
53 |
+
other: null
|
54 |
+
|
55 |
+
example_text: >
|
56 |
+
Scholarly Argumentation Mining (SAM) has recently gained attention due to its
|
57 |
+
potential to help scholars with the rapid growth of published scientific literature.
|
58 |
+
It comprises two subtasks: argumentative discourse unit recognition (ADUR) and
|
59 |
+
argumentative relation extraction (ARE), both of which are challenging since they
|
60 |
+
require e.g. the integration of domain knowledge, the detection of implicit statements,
|
61 |
+
and the disambiguation of argument structure.
|
62 |
+
|
63 |
+
While previous work focused on dataset construction and baseline methods for
|
64 |
+
specific document sections, such as abstract or results, full-text scholarly argumentation
|
65 |
+
mining has seen little progress. In this work, we introduce a sequential pipeline model
|
66 |
+
combining ADUR and ARE for full-text SAM, and provide a first analysis of the
|
67 |
+
performance of pretrained language models (PLMs) on both subtasks.
|
68 |
+
|
69 |
+
We establish a new SotA for ADUR on the Sci-Arg corpus, outperforming the previous best
|
70 |
+
reported result by a large margin (+7% F1). We also present the first results for ARE, and
|
71 |
+
thus for the full AM pipeline, on this benchmark dataset. Our detailed error analysis reveals
|
72 |
+
that non-contiguous ADUs as well as the interpretation of discourse connectors pose major
|
73 |
+
challenges and that data annotation needs to be more consistent.
|