Add new CrossEncoder model
Browse files- .gitattributes +1 -0
- README.md +478 -0
- config.json +53 -0
- model.safetensors +3 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +55 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,478 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- cross-encoder
|
7 |
+
- generated_from_trainer
|
8 |
+
- dataset_size:1990000
|
9 |
+
- loss:BinaryCrossEntropyLoss
|
10 |
+
base_model: Alibaba-NLP/gte-multilingual-base
|
11 |
+
datasets:
|
12 |
+
- sentence-transformers/msmarco
|
13 |
+
pipeline_tag: text-ranking
|
14 |
+
library_name: sentence-transformers
|
15 |
+
metrics:
|
16 |
+
- map
|
17 |
+
- mrr@10
|
18 |
+
- ndcg@10
|
19 |
+
model-index:
|
20 |
+
- name: CrossEncoder based on Alibaba-NLP/gte-multilingual-base
|
21 |
+
results:
|
22 |
+
- task:
|
23 |
+
type: cross-encoder-reranking
|
24 |
+
name: Cross Encoder Reranking
|
25 |
+
dataset:
|
26 |
+
name: NanoMSMARCO R100
|
27 |
+
type: NanoMSMARCO_R100
|
28 |
+
metrics:
|
29 |
+
- type: map
|
30 |
+
value: 0.6138
|
31 |
+
name: Map
|
32 |
+
- type: mrr@10
|
33 |
+
value: 0.6029
|
34 |
+
name: Mrr@10
|
35 |
+
- type: ndcg@10
|
36 |
+
value: 0.6561
|
37 |
+
name: Ndcg@10
|
38 |
+
- task:
|
39 |
+
type: cross-encoder-reranking
|
40 |
+
name: Cross Encoder Reranking
|
41 |
+
dataset:
|
42 |
+
name: NanoNFCorpus R100
|
43 |
+
type: NanoNFCorpus_R100
|
44 |
+
metrics:
|
45 |
+
- type: map
|
46 |
+
value: 0.3423
|
47 |
+
name: Map
|
48 |
+
- type: mrr@10
|
49 |
+
value: 0.5771
|
50 |
+
name: Mrr@10
|
51 |
+
- type: ndcg@10
|
52 |
+
value: 0.3777
|
53 |
+
name: Ndcg@10
|
54 |
+
- task:
|
55 |
+
type: cross-encoder-reranking
|
56 |
+
name: Cross Encoder Reranking
|
57 |
+
dataset:
|
58 |
+
name: NanoNQ R100
|
59 |
+
type: NanoNQ_R100
|
60 |
+
metrics:
|
61 |
+
- type: map
|
62 |
+
value: 0.5809
|
63 |
+
name: Map
|
64 |
+
- type: mrr@10
|
65 |
+
value: 0.5987
|
66 |
+
name: Mrr@10
|
67 |
+
- type: ndcg@10
|
68 |
+
value: 0.6548
|
69 |
+
name: Ndcg@10
|
70 |
+
- task:
|
71 |
+
type: cross-encoder-nano-beir
|
72 |
+
name: Cross Encoder Nano BEIR
|
73 |
+
dataset:
|
74 |
+
name: NanoBEIR R100 mean
|
75 |
+
type: NanoBEIR_R100_mean
|
76 |
+
metrics:
|
77 |
+
- type: map
|
78 |
+
value: 0.5123
|
79 |
+
name: Map
|
80 |
+
- type: mrr@10
|
81 |
+
value: 0.5929
|
82 |
+
name: Mrr@10
|
83 |
+
- type: ndcg@10
|
84 |
+
value: 0.5629
|
85 |
+
name: Ndcg@10
|
86 |
+
---
|
87 |
+
|
88 |
+
# CrossEncoder based on Alibaba-NLP/gte-multilingual-base
|
89 |
+
|
90 |
+
This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) on the [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco) dataset using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
|
91 |
+
|
92 |
+
## Model Details
|
93 |
+
|
94 |
+
### Model Description
|
95 |
+
- **Model Type:** Cross Encoder
|
96 |
+
- **Base model:** [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) <!-- at revision 9fdd4ee8bba0e2808a34e0e739576f6740d2b225 -->
|
97 |
+
- **Maximum Sequence Length:** 8192 tokens
|
98 |
+
- **Number of Output Labels:** 1 label
|
99 |
+
- **Training Dataset:**
|
100 |
+
- [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco)
|
101 |
+
- **Language:** en
|
102 |
+
<!-- - **License:** Unknown -->
|
103 |
+
|
104 |
+
### Model Sources
|
105 |
+
|
106 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
107 |
+
- **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
|
108 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
109 |
+
- **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
|
110 |
+
|
111 |
+
## Usage
|
112 |
+
|
113 |
+
### Direct Usage (Sentence Transformers)
|
114 |
+
|
115 |
+
First install the Sentence Transformers library:
|
116 |
+
|
117 |
+
```bash
|
118 |
+
pip install -U sentence-transformers
|
119 |
+
```
|
120 |
+
|
121 |
+
Then you can load this model and run inference.
|
122 |
+
```python
|
123 |
+
from sentence_transformers import CrossEncoder
|
124 |
+
|
125 |
+
# Download from the 🤗 Hub
|
126 |
+
model = CrossEncoder("skfrost19/reranker-gte-multilingual-base-msmarco-bce")
|
127 |
+
# Get scores for pairs of texts
|
128 |
+
pairs = [
|
129 |
+
['what symptoms might a patient with a tmd have', 'TMD sufferers have a long list of symptoms, including chronic pain (https://youtu.be/SvMaJb8o2RI), many of which are in common with Parkinsonâ\x80\x99s disease (PD) symptoms.'],
|
130 |
+
['what is a thermal protector', 'The word hero comes from the Greek á¼¥Ï\x81Ï\x89Ï\x82 (hÄ\x93rÅ\x8ds), hero, warrior, particularly one such as Heracles with divine ancestry or later given divine honors. literally protector or defender.'],
|
131 |
+
['how many copies of call of duty wwii sold', 'Call of Duty 3. Call of Duty 3 is a World War II first-person shooter and the third installment in the Call of Duty video game series. Released on November 7, 2006, the game was developed by Treyarch, and was the first major installment in the Call of Duty series not to be developed by Infinity Ward. It was also the first not to be released on the PC platform. It was released on the PlayStation 2, PlayStation 3, Wii, Xbox, and Xbox 360.'],
|
132 |
+
['what is the desired temperature for the fresh food compartment in a refrigerator', 'A refrigerator maintains a temperature a few degrees above the freezing point of water. Optimum temperature range for perishable food storage is 3 to 5 °C (37 to 41 °F).emperature settings for refrigerator and freezer compartments are often given arbitrary numbers by manufacturers (for example, 1 through 9, warmest to coldest), but generally 3 to 5 °C (37 to 41 °F) is ideal for the refrigerator compartment and â\x88\x9218 °C (0 °F) for the freezer.'],
|
133 |
+
['what is gsm alarm system', 'Iâ\x80\x99m sure you would have these questions in your mind when you heard GSM alarm system at the first time. GSM alarm system is an alarm system that operating through GSM (global system for mobile communications) network; not requiring a telephone line.urthermore, in the case of burglar entering the premises and cutting the telephone line, the GSM alarm would not be affected and still work as it does not require the use of a fixed phone line. So this security alarm is ideal for the place where no fixed phone line or hard to get one.'],
|
134 |
+
]
|
135 |
+
scores = model.predict(pairs)
|
136 |
+
print(scores.shape)
|
137 |
+
# (5,)
|
138 |
+
|
139 |
+
# Or rank different texts based on similarity to a single text
|
140 |
+
ranks = model.rank(
|
141 |
+
'what symptoms might a patient with a tmd have',
|
142 |
+
[
|
143 |
+
'TMD sufferers have a long list of symptoms, including chronic pain (https://youtu.be/SvMaJb8o2RI), many of which are in common with Parkinsonâ\x80\x99s disease (PD) symptoms.',
|
144 |
+
'The word hero comes from the Greek á¼¥Ï\x81Ï\x89Ï\x82 (hÄ\x93rÅ\x8ds), hero, warrior, particularly one such as Heracles with divine ancestry or later given divine honors. literally protector or defender.',
|
145 |
+
'Call of Duty 3. Call of Duty 3 is a World War II first-person shooter and the third installment in the Call of Duty video game series. Released on November 7, 2006, the game was developed by Treyarch, and was the first major installment in the Call of Duty series not to be developed by Infinity Ward. It was also the first not to be released on the PC platform. It was released on the PlayStation 2, PlayStation 3, Wii, Xbox, and Xbox 360.',
|
146 |
+
'A refrigerator maintains a temperature a few degrees above the freezing point of water. Optimum temperature range for perishable food storage is 3 to 5 °C (37 to 41 °F).emperature settings for refrigerator and freezer compartments are often given arbitrary numbers by manufacturers (for example, 1 through 9, warmest to coldest), but generally 3 to 5 °C (37 to 41 °F) is ideal for the refrigerator compartment and â\x88\x9218 °C (0 °F) for the freezer.',
|
147 |
+
'Iâ\x80\x99m sure you would have these questions in your mind when you heard GSM alarm system at the first time. GSM alarm system is an alarm system that operating through GSM (global system for mobile communications) network; not requiring a telephone line.urthermore, in the case of burglar entering the premises and cutting the telephone line, the GSM alarm would not be affected and still work as it does not require the use of a fixed phone line. So this security alarm is ideal for the place where no fixed phone line or hard to get one.',
|
148 |
+
]
|
149 |
+
)
|
150 |
+
# [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
|
151 |
+
```
|
152 |
+
|
153 |
+
<!--
|
154 |
+
### Direct Usage (Transformers)
|
155 |
+
|
156 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
157 |
+
|
158 |
+
</details>
|
159 |
+
-->
|
160 |
+
|
161 |
+
<!--
|
162 |
+
### Downstream Usage (Sentence Transformers)
|
163 |
+
|
164 |
+
You can finetune this model on your own dataset.
|
165 |
+
|
166 |
+
<details><summary>Click to expand</summary>
|
167 |
+
|
168 |
+
</details>
|
169 |
+
-->
|
170 |
+
|
171 |
+
<!--
|
172 |
+
### Out-of-Scope Use
|
173 |
+
|
174 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
175 |
+
-->
|
176 |
+
|
177 |
+
## Evaluation
|
178 |
+
|
179 |
+
### Metrics
|
180 |
+
|
181 |
+
#### Cross Encoder Reranking
|
182 |
+
|
183 |
+
* Datasets: `NanoMSMARCO_R100`, `NanoNFCorpus_R100` and `NanoNQ_R100`
|
184 |
+
* Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
|
185 |
+
```json
|
186 |
+
{
|
187 |
+
"at_k": 10,
|
188 |
+
"always_rerank_positives": true
|
189 |
+
}
|
190 |
+
```
|
191 |
+
|
192 |
+
| Metric | NanoMSMARCO_R100 | NanoNFCorpus_R100 | NanoNQ_R100 |
|
193 |
+
|:------------|:---------------------|:---------------------|:---------------------|
|
194 |
+
| map | 0.6138 (+0.1242) | 0.3423 (+0.0813) | 0.5809 (+0.1613) |
|
195 |
+
| mrr@10 | 0.6029 (+0.1254) | 0.5771 (+0.0772) | 0.5987 (+0.1720) |
|
196 |
+
| **ndcg@10** | **0.6561 (+0.1157)** | **0.3777 (+0.0527)** | **0.6548 (+0.1541)** |
|
197 |
+
|
198 |
+
#### Cross Encoder Nano BEIR
|
199 |
+
|
200 |
+
* Dataset: `NanoBEIR_R100_mean`
|
201 |
+
* Evaluated with [<code>CrossEncoderNanoBEIREvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderNanoBEIREvaluator) with these parameters:
|
202 |
+
```json
|
203 |
+
{
|
204 |
+
"dataset_names": [
|
205 |
+
"msmarco",
|
206 |
+
"nfcorpus",
|
207 |
+
"nq"
|
208 |
+
],
|
209 |
+
"rerank_k": 100,
|
210 |
+
"at_k": 10,
|
211 |
+
"always_rerank_positives": true
|
212 |
+
}
|
213 |
+
```
|
214 |
+
|
215 |
+
| Metric | Value |
|
216 |
+
|:------------|:---------------------|
|
217 |
+
| map | 0.5123 (+0.1223) |
|
218 |
+
| mrr@10 | 0.5929 (+0.1249) |
|
219 |
+
| **ndcg@10** | **0.5629 (+0.1075)** |
|
220 |
+
|
221 |
+
<!--
|
222 |
+
## Bias, Risks and Limitations
|
223 |
+
|
224 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
225 |
+
-->
|
226 |
+
|
227 |
+
<!--
|
228 |
+
### Recommendations
|
229 |
+
|
230 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
231 |
+
-->
|
232 |
+
|
233 |
+
## Training Details
|
234 |
+
|
235 |
+
### Training Dataset
|
236 |
+
|
237 |
+
#### msmarco
|
238 |
+
|
239 |
+
* Dataset: [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco) at [9e329ed](https://huggingface.co/datasets/sentence-transformers/msmarco/tree/9e329ed2e649c9d37b0d91dd6b764ff6fe671d83)
|
240 |
+
* Size: 1,990,000 training samples
|
241 |
+
* Columns: <code>query</code>, <code>passage</code>, and <code>score</code>
|
242 |
+
* Approximate statistics based on the first 1000 samples:
|
243 |
+
| | query | passage | score |
|
244 |
+
|:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
245 |
+
| type | string | string | float |
|
246 |
+
| details | <ul><li>min: 11 characters</li><li>mean: 34.61 characters</li><li>max: 124 characters</li></ul> | <ul><li>min: 82 characters</li><li>mean: 357.43 characters</li><li>max: 1034 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.49</li><li>max: 1.0</li></ul> |
|
247 |
+
* Samples:
|
248 |
+
| query | passage | score |
|
249 |
+
|:---------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
250 |
+
| <code>what causes your tailbone to hurt</code> | <code>A coccyx injury results in pain and discomfort in the tailbone area (the condition is called coccydynia). These injuries may result in a bruise, dislocation, or fracture (break) of the coccyx. Although they may be slow to heal, the majority of coccyx injuries can be managed with cautious treatment.ost tailbone injuries are caused by trauma to the coccyx area. 1 A fall onto the tailbone in the seated position, usually against a hard surface, is the most common cause of coccyx injuries. 2 A direct blow to the tailbone, such as those that occur during contact sports, can injure the coccyx.</code> | <code>1.0</code> |
|
251 |
+
| <code>what muscles do trunk lateral flexion</code> | <code>Itâs the same with the External Obliques, but unlike the External Obliques, they are not visible when fully developed. Action: 1 Supports abdominal wall, assists forced respiration, aids raising intra-abdominal pressure and, with muscles of other side, abducts and rotates trunk. 2 Contraction of one side alone laterally bends the trunk to that side and rotates the trunk to the other side.</code> | <code>0.0</code> |
|
252 |
+
| <code>brake horsepower definition</code> | <code>When the brake lights will not come on, the first thing to check is the third-brake light. If it too is not working, the brake-light switch, a bad fuse or an unplugged harness is likely.ull up on the brake pedal and if the lights go out, switch mis-alignment or pedal position error is the likely cause. The final possibility is a wire shorted to power. Unplug the brake-light switch and if the lights stay on, a short circuit is the case.</code> | <code>0.0</code> |
|
253 |
+
* Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
|
254 |
+
```json
|
255 |
+
{
|
256 |
+
"activation_fn": "torch.nn.modules.linear.Identity",
|
257 |
+
"pos_weight": null
|
258 |
+
}
|
259 |
+
```
|
260 |
+
|
261 |
+
### Evaluation Dataset
|
262 |
+
|
263 |
+
#### msmarco
|
264 |
+
|
265 |
+
* Dataset: [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco) at [9e329ed](https://huggingface.co/datasets/sentence-transformers/msmarco/tree/9e329ed2e649c9d37b0d91dd6b764ff6fe671d83)
|
266 |
+
* Size: 10,000 evaluation samples
|
267 |
+
* Columns: <code>query</code>, <code>passage</code>, and <code>score</code>
|
268 |
+
* Approximate statistics based on the first 1000 samples:
|
269 |
+
| | query | passage | score |
|
270 |
+
|:--------|:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
271 |
+
| type | string | string | float |
|
272 |
+
| details | <ul><li>min: 9 characters</li><li>mean: 33.72 characters</li><li>max: 193 characters</li></ul> | <ul><li>min: 55 characters</li><li>mean: 353.35 characters</li><li>max: 895 characters</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.5</li><li>max: 1.0</li></ul> |
|
273 |
+
* Samples:
|
274 |
+
| query | passage | score |
|
275 |
+
|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
276 |
+
| <code>what symptoms might a patient with a tmd have</code> | <code>TMD sufferers have a long list of symptoms, including chronic pain (https://youtu.be/SvMaJb8o2RI), many of which are in common with Parkinsonâs disease (PD) symptoms.</code> | <code>1.0</code> |
|
277 |
+
| <code>what is a thermal protector</code> | <code>The word hero comes from the Greek á¼¥ÏÏÏ (hÄrÅs), hero, warrior, particularly one such as Heracles with divine ancestry or later given divine honors. literally protector or defender.</code> | <code>0.0</code> |
|
278 |
+
| <code>how many copies of call of duty wwii sold</code> | <code>Call of Duty 3. Call of Duty 3 is a World War II first-person shooter and the third installment in the Call of Duty video game series. Released on November 7, 2006, the game was developed by Treyarch, and was the first major installment in the Call of Duty series not to be developed by Infinity Ward. It was also the first not to be released on the PC platform. It was released on the PlayStation 2, PlayStation 3, Wii, Xbox, and Xbox 360.</code> | <code>0.0</code> |
|
279 |
+
* Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
|
280 |
+
```json
|
281 |
+
{
|
282 |
+
"activation_fn": "torch.nn.modules.linear.Identity",
|
283 |
+
"pos_weight": null
|
284 |
+
}
|
285 |
+
```
|
286 |
+
|
287 |
+
### Training Hyperparameters
|
288 |
+
#### Non-Default Hyperparameters
|
289 |
+
|
290 |
+
- `eval_strategy`: steps
|
291 |
+
- `per_device_train_batch_size`: 128
|
292 |
+
- `per_device_eval_batch_size`: 128
|
293 |
+
- `learning_rate`: 2e-05
|
294 |
+
- `num_train_epochs`: 1
|
295 |
+
- `warmup_ratio`: 0.1
|
296 |
+
- `seed`: 12
|
297 |
+
- `bf16`: True
|
298 |
+
- `dataloader_num_workers`: 4
|
299 |
+
- `load_best_model_at_end`: True
|
300 |
+
|
301 |
+
#### All Hyperparameters
|
302 |
+
<details><summary>Click to expand</summary>
|
303 |
+
|
304 |
+
- `overwrite_output_dir`: False
|
305 |
+
- `do_predict`: False
|
306 |
+
- `eval_strategy`: steps
|
307 |
+
- `prediction_loss_only`: True
|
308 |
+
- `per_device_train_batch_size`: 128
|
309 |
+
- `per_device_eval_batch_size`: 128
|
310 |
+
- `per_gpu_train_batch_size`: None
|
311 |
+
- `per_gpu_eval_batch_size`: None
|
312 |
+
- `gradient_accumulation_steps`: 1
|
313 |
+
- `eval_accumulation_steps`: None
|
314 |
+
- `torch_empty_cache_steps`: None
|
315 |
+
- `learning_rate`: 2e-05
|
316 |
+
- `weight_decay`: 0.0
|
317 |
+
- `adam_beta1`: 0.9
|
318 |
+
- `adam_beta2`: 0.999
|
319 |
+
- `adam_epsilon`: 1e-08
|
320 |
+
- `max_grad_norm`: 1.0
|
321 |
+
- `num_train_epochs`: 1
|
322 |
+
- `max_steps`: -1
|
323 |
+
- `lr_scheduler_type`: linear
|
324 |
+
- `lr_scheduler_kwargs`: {}
|
325 |
+
- `warmup_ratio`: 0.1
|
326 |
+
- `warmup_steps`: 0
|
327 |
+
- `log_level`: passive
|
328 |
+
- `log_level_replica`: warning
|
329 |
+
- `log_on_each_node`: True
|
330 |
+
- `logging_nan_inf_filter`: True
|
331 |
+
- `save_safetensors`: True
|
332 |
+
- `save_on_each_node`: False
|
333 |
+
- `save_only_model`: False
|
334 |
+
- `restore_callback_states_from_checkpoint`: False
|
335 |
+
- `no_cuda`: False
|
336 |
+
- `use_cpu`: False
|
337 |
+
- `use_mps_device`: False
|
338 |
+
- `seed`: 12
|
339 |
+
- `data_seed`: None
|
340 |
+
- `jit_mode_eval`: False
|
341 |
+
- `use_ipex`: False
|
342 |
+
- `bf16`: True
|
343 |
+
- `fp16`: False
|
344 |
+
- `fp16_opt_level`: O1
|
345 |
+
- `half_precision_backend`: auto
|
346 |
+
- `bf16_full_eval`: False
|
347 |
+
- `fp16_full_eval`: False
|
348 |
+
- `tf32`: None
|
349 |
+
- `local_rank`: 0
|
350 |
+
- `ddp_backend`: None
|
351 |
+
- `tpu_num_cores`: None
|
352 |
+
- `tpu_metrics_debug`: False
|
353 |
+
- `debug`: []
|
354 |
+
- `dataloader_drop_last`: False
|
355 |
+
- `dataloader_num_workers`: 4
|
356 |
+
- `dataloader_prefetch_factor`: None
|
357 |
+
- `past_index`: -1
|
358 |
+
- `disable_tqdm`: False
|
359 |
+
- `remove_unused_columns`: True
|
360 |
+
- `label_names`: None
|
361 |
+
- `load_best_model_at_end`: True
|
362 |
+
- `ignore_data_skip`: False
|
363 |
+
- `fsdp`: []
|
364 |
+
- `fsdp_min_num_params`: 0
|
365 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
366 |
+
- `tp_size`: 0
|
367 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
368 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
369 |
+
- `deepspeed`: None
|
370 |
+
- `label_smoothing_factor`: 0.0
|
371 |
+
- `optim`: adamw_torch
|
372 |
+
- `optim_args`: None
|
373 |
+
- `adafactor`: False
|
374 |
+
- `group_by_length`: False
|
375 |
+
- `length_column_name`: length
|
376 |
+
- `ddp_find_unused_parameters`: None
|
377 |
+
- `ddp_bucket_cap_mb`: None
|
378 |
+
- `ddp_broadcast_buffers`: False
|
379 |
+
- `dataloader_pin_memory`: True
|
380 |
+
- `dataloader_persistent_workers`: False
|
381 |
+
- `skip_memory_metrics`: True
|
382 |
+
- `use_legacy_prediction_loop`: False
|
383 |
+
- `push_to_hub`: False
|
384 |
+
- `resume_from_checkpoint`: None
|
385 |
+
- `hub_model_id`: None
|
386 |
+
- `hub_strategy`: every_save
|
387 |
+
- `hub_private_repo`: None
|
388 |
+
- `hub_always_push`: False
|
389 |
+
- `gradient_checkpointing`: False
|
390 |
+
- `gradient_checkpointing_kwargs`: None
|
391 |
+
- `include_inputs_for_metrics`: False
|
392 |
+
- `include_for_metrics`: []
|
393 |
+
- `eval_do_concat_batches`: True
|
394 |
+
- `fp16_backend`: auto
|
395 |
+
- `push_to_hub_model_id`: None
|
396 |
+
- `push_to_hub_organization`: None
|
397 |
+
- `mp_parameters`:
|
398 |
+
- `auto_find_batch_size`: False
|
399 |
+
- `full_determinism`: False
|
400 |
+
- `torchdynamo`: None
|
401 |
+
- `ray_scope`: last
|
402 |
+
- `ddp_timeout`: 1800
|
403 |
+
- `torch_compile`: False
|
404 |
+
- `torch_compile_backend`: None
|
405 |
+
- `torch_compile_mode`: None
|
406 |
+
- `dispatch_batches`: None
|
407 |
+
- `split_batches`: None
|
408 |
+
- `include_tokens_per_second`: False
|
409 |
+
- `include_num_input_tokens_seen`: False
|
410 |
+
- `neftune_noise_alpha`: None
|
411 |
+
- `optim_target_modules`: None
|
412 |
+
- `batch_eval_metrics`: False
|
413 |
+
- `eval_on_start`: False
|
414 |
+
- `use_liger_kernel`: False
|
415 |
+
- `eval_use_gather_object`: False
|
416 |
+
- `average_tokens_across_devices`: False
|
417 |
+
- `prompts`: None
|
418 |
+
- `batch_sampler`: batch_sampler
|
419 |
+
- `multi_dataset_batch_sampler`: proportional
|
420 |
+
|
421 |
+
</details>
|
422 |
+
|
423 |
+
### Training Logs
|
424 |
+
| Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_R100_ndcg@10 | NanoNFCorpus_R100_ndcg@10 | NanoNQ_R100_ndcg@10 | NanoBEIR_R100_mean_ndcg@10 |
|
425 |
+
|:----------:|:---------:|:-------------:|:---------------:|:------------------------:|:-------------------------:|:--------------------:|:--------------------------:|
|
426 |
+
| -1 | -1 | - | - | 0.0063 (-0.5341) | 0.2009 (-0.1241) | 0.0649 (-0.4357) | 0.0907 (-0.3646) |
|
427 |
+
| 0.0001 | 1 | 0.702 | - | - | - | - | - |
|
428 |
+
| 0.2573 | 4000 | 0.2125 | - | - | - | - | - |
|
429 |
+
| 0.5146 | 8000 | 0.1655 | - | - | - | - | - |
|
430 |
+
| **0.6432** | **10000** | **-** | **0.1367** | **0.6561 (+0.1157)** | **0.3777 (+0.0527)** | **0.6548 (+0.1541)** | **0.5629 (+0.1075)** |
|
431 |
+
| 0.7719 | 12000 | 0.1411 | - | - | - | - | - |
|
432 |
+
| -1 | -1 | - | - | 0.6561 (+0.1157) | 0.3777 (+0.0527) | 0.6548 (+0.1541) | 0.5629 (+0.1075) |
|
433 |
+
|
434 |
+
* The bold row denotes the saved checkpoint.
|
435 |
+
|
436 |
+
### Framework Versions
|
437 |
+
- Python: 3.11.5
|
438 |
+
- Sentence Transformers: 4.0.1
|
439 |
+
- Transformers: 4.50.3
|
440 |
+
- PyTorch: 2.6.0+cu124
|
441 |
+
- Accelerate: 1.6.0
|
442 |
+
- Datasets: 3.5.0
|
443 |
+
- Tokenizers: 0.21.1
|
444 |
+
|
445 |
+
## Citation
|
446 |
+
|
447 |
+
### BibTeX
|
448 |
+
|
449 |
+
#### Sentence Transformers
|
450 |
+
```bibtex
|
451 |
+
@inproceedings{reimers-2019-sentence-bert,
|
452 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
453 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
454 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
455 |
+
month = "11",
|
456 |
+
year = "2019",
|
457 |
+
publisher = "Association for Computational Linguistics",
|
458 |
+
url = "https://arxiv.org/abs/1908.10084",
|
459 |
+
}
|
460 |
+
```
|
461 |
+
|
462 |
+
<!--
|
463 |
+
## Glossary
|
464 |
+
|
465 |
+
*Clearly define terms in order to be accessible across audiences.*
|
466 |
+
-->
|
467 |
+
|
468 |
+
<!--
|
469 |
+
## Model Card Authors
|
470 |
+
|
471 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
472 |
+
-->
|
473 |
+
|
474 |
+
<!--
|
475 |
+
## Model Card Contact
|
476 |
+
|
477 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
478 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"NewForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.0,
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
|
8 |
+
"AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
|
9 |
+
"AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
|
10 |
+
"AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
|
11 |
+
"AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
|
12 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
|
13 |
+
"AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
|
14 |
+
},
|
15 |
+
"classifier_dropout": 0.0,
|
16 |
+
"hidden_act": "gelu",
|
17 |
+
"hidden_dropout_prob": 0.1,
|
18 |
+
"hidden_size": 768,
|
19 |
+
"id2label": {
|
20 |
+
"0": "LABEL_0"
|
21 |
+
},
|
22 |
+
"initializer_range": 0.02,
|
23 |
+
"intermediate_size": 3072,
|
24 |
+
"label2id": {
|
25 |
+
"LABEL_0": 0
|
26 |
+
},
|
27 |
+
"layer_norm_eps": 1e-12,
|
28 |
+
"layer_norm_type": "layer_norm",
|
29 |
+
"logn_attention_clip1": false,
|
30 |
+
"logn_attention_scale": false,
|
31 |
+
"max_position_embeddings": 8192,
|
32 |
+
"model_type": "new",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"pack_qkv": true,
|
36 |
+
"pad_token_id": 1,
|
37 |
+
"position_embedding_type": "rope",
|
38 |
+
"rope_scaling": {
|
39 |
+
"factor": 8.0,
|
40 |
+
"type": "ntk"
|
41 |
+
},
|
42 |
+
"rope_theta": 20000,
|
43 |
+
"sentence_transformers": {
|
44 |
+
"activation_fn": "torch.nn.modules.activation.Sigmoid",
|
45 |
+
"version": "4.0.1"
|
46 |
+
},
|
47 |
+
"torch_dtype": "float32",
|
48 |
+
"transformers_version": "4.50.3",
|
49 |
+
"type_vocab_size": 1,
|
50 |
+
"unpad_inputs": false,
|
51 |
+
"use_memory_efficient_attention": false,
|
52 |
+
"vocab_size": 250048
|
53 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:903dce1291641269f9fea0db34549fbf24b8b0ad63731143fbcceaeadcf83cd7
|
3 |
+
size 1223854204
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa7a6ad87a7ce8fe196787355f6af7d03aee94d19c54a5eb1392ed18c8ef451a
|
3 |
+
size 17082988
|
tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "<mask>",
|
50 |
+
"model_max_length": 8192,
|
51 |
+
"pad_token": "<pad>",
|
52 |
+
"sep_token": "</s>",
|
53 |
+
"tokenizer_class": "XLMRobertaTokenizerFast",
|
54 |
+
"unk_token": "<unk>"
|
55 |
+
}
|