Spaces:

ArneBinder
/

ScientificArgumentRecommender

Running

App Files Files Community

ArneBinder commited on Mar 1

Commit

f848af8

verified ·

1 Parent(s): ced4316

fix: upload https://github.com/ArneBinder/pie-document-level/pull/397

Browse files

Files changed (12) hide show

configs/argumentation_model/_joint.yaml +4 -0
configs/argumentation_model/_pipelined.yaml +17 -0
configs/argumentation_model/joint.yaml +10 -0
configs/argumentation_model/joint_hps.yaml +7 -0
configs/argumentation_model/pipelined.yaml +8 -0
configs/argumentation_model/pipelined_deprecated.yaml +9 -0
configs/argumentation_model/pipelined_hps.yaml +8 -0
configs/argumentation_model/pipelined_new.yaml +14 -0
configs/demo.yaml +17 -7
configs/pdf_fulltext_extractor/grobid_local.yaml +18 -0
configs/pdf_fulltext_extractor/none.yaml +0 -0
configs/retriever/related_span_retriever_with_relations_from_other_docs.yaml +3 -1

configs/argumentation_model/_joint.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: pytorch_ie.auto.AutoPipeline.from_pretrained
+pretrained_model_name_or_path: ???
+# this batch_size that works good (fastest) on a single RTX2080Ti (11GB) (see https://github.com/ArneBinder/pie-document-level/issues/334#issuecomment-2613232344)
+batch_size: 1

configs/argumentation_model/_pipelined.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+_target_: src.pipeline.NerRePipeline
+ner_model_path: ???
+re_model_path: ???
+entity_layer: labeled_spans
+relation_layer: binary_relations
+# this works good on a single RTX2080Ti (11GB)
+ner_pipeline:
+  batch_size: 256
+re_pipeline:
+  batch_size: 64
+  # convert the RE model to half precision for mixed precision inference (speedup approx. 4x)
+  half_precision_model: true
+  taskmodule_kwargs:
+    # don't show statistics after encoding
+    collect_statistics: false
+# don't show pipeline steps
+verbose: false

configs/argumentation_model/joint.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+defaults:
+  - _joint
+# best model based on the validation set (see https://github.com/ArneBinder/pie-document-level/issues/334#issuecomment-2613232344 for details)
+# i.e. models from https://github.com/ArneBinder/pie-document-level/issues/334#issuecomment-2578422544, but with last checkpoint (instead of best validation checkpoint)
+#   model_name_or_path: models/dataset-sciarg/task-ner_re/v0.4/2025-01-09_01-50-53
+#   ckpt_path: logs/training/multiruns/dataset-sciarg/task-ner_re/v0.4/2025-01-09_01-50-52/2/checkpoints/last.ckpt
+#   w&b run (for the loaded checkpoint): [icy-glitter-5](https://wandb.ai/arne/dataset-sciarg-task-ner_re-v0.4-training/runs/it5toj6w)
+pretrained_model_name_or_path: "ArneBinder/sam-pointer-bart-base-v0.4"
+revision: "0445c69bafa31f8153aaeafc1767fad84919926a"

configs/argumentation_model/joint_hps.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+defaults:
+  - _joint
+# from: hparams_search for all datasets
+# see https://github.com/ArneBinder/pie-document-level/pull/381#issuecomment-2682711151
+# THESE ARE LOCAL PATHS, NOT HUGGINGFACE MODELS!
+pretrained_model_name_or_path: models/dataset-sciarg/task-ner_re/2025-02-23_05-16-45

configs/argumentation_model/pipelined.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+defaults:
+  - _pipelined
+# from: train pipeline models with bigger train set,
+# see https://github.com/ArneBinder/pie-document-level/issues/355#issuecomment-2612958658
+# THESE ARE LOCAL PATHS, NOT HUGGINGFACE MODELS!
+ner_model_path: models/dataset-sciarg/task-adus/v0.4/2025-01-20_05-50-00
+re_model_path: models/dataset-sciarg/task-relations/v0.4/2025-01-22_20-36-23

configs/argumentation_model/pipelined_deprecated.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+defaults:
+  - _pipelined
+# from: train pipeline models with bigger train set, but with strange choice of models,
+# see edit history of https://github.com/ArneBinder/pie-document-level/issues/355#issuecomment-2612958658
+# NOTE: these were originally in the pipelined.yaml
+# THESE ARE LOCAL PATHS, NOT HUGGINGFACE MODELS!
+ner_model_path: models/dataset-sciarg/task-adus/v0.4/2025-01-20_09-09-11
+re_model_path: models/dataset-sciarg/task-relations/v0.4/2025-01-22_12-44-51

configs/argumentation_model/pipelined_hps.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+defaults:
+  - _pipelined
+# from: hparams_search for all datasets,
+# see https://github.com/ArneBinder/pie-document-level/pull/381#issuecomment-2684865102
+# THESE ARE LOCAL PATHS, NOT HUGGINGFACE MODELS!
+ner_model_path: models/dataset-sciarg/task-adur/2025-02-26_07-14-59
+re_model_path: models/dataset-sciarg/task-are/2025-02-20_18-09-25

configs/argumentation_model/pipelined_new.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+defaults:
+  - _pipelined
+# from: Update scientific ARE experiment configs,
+# see https://github.com/ArneBinder/pie-document-level/pull/379#issuecomment-2651669398
+# i.e. the models are now on Hugging Face
+# ner_model_path: models/dataset-sciarg/task-adur/2025-02-09_23-08-37
+# re_model_path: models/dataset-sciarg/task-are/2025-02-10_19-24-52
+ner_model_path: ArneBinder/sam-adur-sciarg
+ner_pipeline:
+  revision: bcbef4e585a5f637009ff702661cf824abede6b0
+re_model_path: ArneBinder/sam-are-sciarg
+re_pipeline:
+  revision: 93024388330c58daf20963c2020e08f54553e74c

configs/demo.yaml CHANGED Viewed

@@ -1,8 +1,12 @@
-# model details
-# this is models/dataset-sciarg/task-ner_re/v0.3/2024-11-02_13-23-14
-# w&b run: [clear-grass-46](https://wandb.ai/arne/dataset-sciarg-task-ner_re-v0.3-training/runs/3cgqcalc)
-default_model_name: "ArneBinder/sam-pointer-bart-base-v0.3.1"
-default_model_revision: "d090d5385380692933e8a3bc466236e3a905492d"
 # Whether to handle segmented entities in the document. If True, labeled_spans are converted
 # to labeled_multi_spans and binary_relations with label "parts_of_same" are used to merge them.
 # This requires the networkx package to be installed.
@@ -10,9 +14,9 @@ handle_parts_of_same: true
 # Split the document text into sections that are processed separately.
 default_split_regex: "\n\n\n+"
-# retriever details
-default_retriever_config_path: "configs/retriever/related_span_retriever_with_relations_from_other_docs.yaml"
 default_min_similarity: 0.95
 # data import details
 default_arxiv_id: "1706.03762"
@@ -21,6 +25,12 @@ default_load_pie_dataset_kwargs:
   name: "resolve_parts_of_same"
   split: "train"
 # for better readability in the UI
 render_mode_captions:
   displacy: "displaCy + highlighted arguments"

+defaults:
+  - _self_
+  # default retriever, see subfolder retriever for more details
+  - retriever: related_span_retriever_with_relations_from_other_docs
+  # default argumentation model, see subfolder argumentation_model for more details
+  - argumentation_model: pipelined_new
+  # since this requires a running GROBID server, we disable it by default
+  - pdf_fulltext_extractor: none
 # Whether to handle segmented entities in the document. If True, labeled_spans are converted
 # to labeled_multi_spans and binary_relations with label "parts_of_same" are used to merge them.
 # This requires the networkx package to be installed.
 # Split the document text into sections that are processed separately.
 default_split_regex: "\n\n\n+"
+# retriever details (query parameters)
 default_min_similarity: 0.95
+default_top_k: 10
 # data import details
 default_arxiv_id: "1706.03762"
   name: "resolve_parts_of_same"
   split: "train"
+# set to the data directory of https://github.com/acl-org/acl-anthology
+#   to enable ACL venue PDF import (requires to also have a valid pdf_fulltext_extractor)
+# acl_anthology_data_dir=../acl-anthology/data
+# temporary directory to store downloaded PDFs
+acl_anthology_pdf_dir: "data/acl-anthology/pdf"
 # for better readability in the UI
 render_mode_captions:
   displacy: "displaCy + highlighted arguments"

configs/pdf_fulltext_extractor/grobid_local.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# This requires a running GROBID server. To start the server via Docker, run:
+#   docker run --rm --init --ulimit core=0 -p 8070:8070 lfoppiano/grobid:0.8.0
+_target_: src.utils.pdf_utils.process_pdf.GrobidFulltextExtractor
+section_seperator: "\n\n\n"
+paragraph_seperator: "\n\n"
+grobid_config:
+  grobid_server: localhost
+  grobid_port: 8070
+  batch_size: 1000
+  sleep_time: 5
+  generateIDs: false
+  consolidate_header: false
+  consolidate_citations: false
+  include_raw_citations: true
+  include_raw_affiliations: false
+  max_workers: 2
+verbose: false

configs/pdf_fulltext_extractor/none.yaml ADDED Viewed

File without changes

configs/retriever/related_span_retriever_with_relations_from_other_docs.yaml CHANGED Viewed

@@ -1,8 +1,10 @@
 _target_: src.langchain_modules.DocumentAwareSpanRetrieverWithRelations
 reversed_relations_suffix: _reversed
 relation_labels:
   - supports_reversed
-  - contradicts_reversed
 retrieve_from_same_document: false
 retrieve_from_different_documents: true
 pie_document_type:

 _target_: src.langchain_modules.DocumentAwareSpanRetrieverWithRelations
+symmetric_relations:
+  - contradicts
 reversed_relations_suffix: _reversed
 relation_labels:
   - supports_reversed
+  - contradicts
 retrieve_from_same_document: false
 retrieve_from_different_documents: true
 pie_document_type: