Shuu12121's picture
Upload ModernBERT model
a5c0020 verified
|
raw
history blame
32.9 kB
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:2022217
  - loss:MultipleNegativesRankingLoss
base_model: Shuu12121/CodeModernBERT-Crow
widget:
  - source_sentence: |-
      Clone value to a new instance

      @private
      @param {*} val
      @returns {*}
    sentences:
      - |-
        function _copy(val) {
            const type = $type(val);

            if (type == 'object') {
                val = _extend({}, val, true);
            } else if (type == 'array') {
                val = val.slice(0);
            }

            return val;
        }
      - |-
        function (data) {
            data = data || {};

            this.category = data.hasOwnProperty('category') ? data.category : 'No category';
            this.id = data.hasOwnProperty('id') ? data.id : '';
            this.group = data.hasOwnProperty('group') ? data.group : '';
            this.lines = data.hasOwnProperty('lines') ? data.lines : 0;
            this.name = data.hasOwnProperty('name') ? data.name : '';
            this.options = data.hasOwnProperty('options') ? data.options : {};
            this.origin = data.hasOwnProperty('origin') ? data.origin : '';
            this.resources = data.hasOwnProperty('resources') ? data.resources : Object.assign({}, getDefaultOptions().resources);
            this.usage = data.hasOwnProperty('usage') ? data.usage : [];
            this.viewId = data.hasOwnProperty('viewId') ? data.viewId : '';
        }
      - |-
        public function relativeMove($model, $position)
            {
                $conditionAttributes = (array)$this->conditionAttributes;
                $owner = $this->owner;

                if (!empty($conditionAttributes)) {
                    $sameCondition = true;
                    foreach ($conditionAttributes as $attr) {
                        if ($owner->getAttribute($attr) != $model->getAttribute($attr)) {
                            $sameCondition = false;
                            break;
                        }
                    }
                    if (!$sameCondition) {
                        // move in other condition category
                        $this->moveToTop();
                        // update condition attribute
                        $condition = [];
                        foreach ($conditionAttributes as $attr) {
                            $condition[$attr] = $model->getAttribute($attr);
                        }
                        $condition[$this->sortAttribute] = $owner->find()->andWhere($this->getCondition())->count() - 1;
                        $owner->updateAttributes($condition);
                    }
                }
                // calculate pos change
                $currentPos = $owner->getAttribute($this->sortAttribute);
                $destinationPos = $model->getAttribute($this->sortAttribute);
                if ($position == 'after') {
                    $newPos = $destinationPos > $currentPos ? $destinationPos - 1 : $destinationPos;
                } else {
                    $newPos = $destinationPos > $currentPos ? $destinationPos : $destinationPos + 1;
                }
                $this->moveToPosition($newPos);
            }
  - source_sentence: >-
      /*

      Realize an asynchronous squeue command on slurm according a parameter (or
      not).

      Data are formated into a literal.

      @paramSqueue {string} optional. For example : ' -o "%j %i" ' // not
      implemented yet
    sentences:
      - |-
        function(paramSqueue) {
            if (!paramSqueue) paramSqueue = '';
            paramSqueue = ''; // to remove when it will be take into account in the implementation
            var emitter = new events.EventEmitter();
            var squeueRes_dict = {
                'id': [],
                'partition': [],
                'nameUUID': [],
                'status': []
            }

            // squeue command
            var exec_cmd = require('child_process').exec;
            exec_cmd(queueBinary + '  -o \"\%i \%P \%j \%t\" ' + paramSqueue, function(err, stdout, stderr) {
                if (err) {
                    emitter.emit('listError', err);
                    return;
                }
                var squeueRes_str = ('' + stdout).replace(/\"/g, ''); // squeue results
                squeueRes_str.split('\n')
                    .filter(function(jobArray, i) {
                        return jobArray.length > 0 && i > 0;
                    })
                    .map(function(jobLine, i) { // for each job
                        return test = jobLine.split(' ').filter(function(val) {
                            return val != ''; // keep values that are not empty
                        });
                    })
                    .map(function(jobArray, i) { // save each field in the corresponding array of dict
                        squeueRes_dict.id.push(jobArray[0]); // job ID gived by slurm
                        squeueRes_dict.partition.push(jobArray[1]); // gpu, cpu, etc.
                        squeueRes_dict.nameUUID.push(jobArray[2]); // unique job ID gived by Nslurm (uuid)
                        squeueRes_dict.status.push(jobArray[3]); // P, R, CF, CG, etc.
                    });
                emitter.emit('data', squeueRes_dict);
            });
            return emitter;
        }
      - |-
        function(node, state, leaving) {
                for (let i = 0; i < state.currentSegments.length; ++i) {
                    const segInternal = state.currentSegments[i].internal;

                    if (leaving) {
                        segInternal.exitNodes.push(node);
                    } else {
                        segInternal.nodes.push(node);
                    }
                }

                debug([
                    `${state.currentSegments.map(getId).join(",")})`,
                    `${node.type}${leaving ? ":exit" : ""}`
                ].join(" "));
            }
      - >-
        private void checkForGenerator(final Class<?> clazz, Field field,
        GeneratedValue generatedValue, String schemaName)

            {

                TableGenerator tableGenerator = field.getAnnotation(TableGenerator.class);
                SequenceGenerator sequenceGenerator = field.getAnnotation(SequenceGenerator.class);
                if (tableGenerator == null || !tableGenerator.name().equals(generatedValue.generator()))
                {
                    tableGenerator = clazz.getAnnotation(TableGenerator.class);
                }
                if (sequenceGenerator == null || !sequenceGenerator.name().equals(generatedValue.generator()))
                {
                    sequenceGenerator = clazz.getAnnotation(SequenceGenerator.class);
                }

                if ((tableGenerator == null && sequenceGenerator == null)
                        || (tableGenerator != null && !tableGenerator.name().equals(generatedValue.generator()))
                        || (sequenceGenerator != null && !sequenceGenerator.name().equals(generatedValue.generator())))
                {

                    throw new RuleValidationException("Unknown Id.generator: " + generatedValue.generator());

                }
                else if ((tableGenerator != null && !tableGenerator.schema().isEmpty() && !tableGenerator.schema().equals(
                        schemaName))
                        || (sequenceGenerator != null && !sequenceGenerator.schema().isEmpty() && !sequenceGenerator.schema()
                                .equals(schemaName)))
                {

                    throw new RuleValidationException("Generator " + generatedValue.generator() + " in entity : "
                            + clazz.getName() + " has different schema name ,it should be same as entity have");

                }

            }
  - source_sentence: |-
      @param      $param1
      @param null $param2
      @param null $param3

      @return array|int|mixed
      @throws DbException
    sentences:
      - "func (s *GetReservationUtilizationOutput) SetUtilizationsByTime(v []*UtilizationByTime) *GetReservationUtilizationOutput {\n\ts.UtilizationsByTime = v\n\treturn s\n}"
      - |-
        public static function ask($param1, $param2 = null, $param3 = null) {
                self::init();
                if(is_array($param1)) {
                    return self::smartSelect($param1, $param2, $param3);
                } else {
                    switch(substr($param1, 0, 1)) {
                        case '>':
                        case '/':
                            return self::smartQuery($param1, $param2);
                            break;
                        case '?':
                            return self::smartSelect(substr($param1, 1), $param2, $param3);
                            break;
                        default:
                            if(is_array($param3)) {
                                return self::smartUpdate($param1, $param2, $param3);
                            } else {
                                return self::smartInsert($param1, $param2);
                            }
                    }
                }
            }
      - |-
        void printStates() {
                   int     c;    // input "character"
                   int     n;    // state number

                   System.out.print("state |           i n p u t     s y m b o l s \n");
                   System.out.print("      | Acc  LA    Tag");
                   for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
                       RBBINode.printInt(c, 3);
                   }
                   System.out.print("\n");
                   System.out.print("      |---------------");
                   for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
                       System.out.print("---");
                   }
                   System.out.print("\n");

                   for (n=0; n<fDStates.size(); n++) {
                       RBBIStateDescriptor sd = fDStates.get(n);
                       RBBINode.printInt(n, 5);
                       System.out.print(" | ");

                       RBBINode.printInt(sd.fAccepting, 3);
                       RBBINode.printInt(sd.fLookAhead, 4);
                       RBBINode.printInt(sd.fTagsIdx, 6);
                       System.out.print(" ");
                       for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
                           RBBINode.printInt(sd.fDtran[c], 3);
                       }
                       System.out.print("\n");
                   }
                   System.out.print("\n\n");
               }
  - source_sentence: >-
      Performs a forward, allowing page-relative paths and setting all values

      compatible with &lt;ao:forward&gt; tag.


      @param  args  The arguments for the page, make unmodifiable and accessible
      as request-scope var "arg"


      @see #forward(java.lang.String, javax.servlet.RequestDispatcher,
      javax.servlet.http.HttpServletRequest,
      javax.servlet.http.HttpServletResponse, java.util.Map)
    sentences:
      - "public static void forward(\n\t\tServletContext servletContext,\n\t\tString page,\n\t\tHttpServletRequest request,\n\t\tHttpServletResponse response,\n\t\tMap<String,?> args\n\t) throws ServletException, IOException {\n\t\t// Resolve the dispatcher\n\t\tString contextRelativePath = ServletUtil.getAbsolutePath(getCurrentPagePath(request), page);\n\t\tRequestDispatcher dispatcher = servletContext.getRequestDispatcher(contextRelativePath);\n\t\tif(dispatcher==null) throw new LocalizedServletException(accessor, \"Dispatcher.dispatcherNotFound\", contextRelativePath);\n\t\tforward(contextRelativePath, dispatcher, request, response, args);\n\t}"
      - "public function getConnection($name = ''): QueryBuilderInterface\n\t{\n\t\t// If the parameter is a string, use it as an array index\n\t\tif (is_scalar($name) && isset($this->connections[$name]))\n\t\t{\n\t\t\treturn $this->connections[$name];\n\t\t}\n\t\telse if (empty($name) && ! empty($this->connections)) // Otherwise, return the last one\n\t\t{\n\t\t\treturn end($this->connections);\n\t\t}\n\n\t\t// You should actually connect before trying to get a connection...\n\t\tthrow new InvalidArgumentException('The specified connection does not exist');\n\t}"
      - "func (c *Context) Untrack(class, id string) error {\n\tfullID := payload.BuildID(class, id)\n\tlogger.Tracef(\"Calling untrack on payload context %q\", fullID)\n\n\tres, err := c.api.Untrack(fullID)\n\tif err != nil {\n\t\treturn errors.Trace(err)\n\t}\n\t// TODO(ericsnow) We should not ignore a 0-len result.\n\tif len(res) > 0 && res[0].Error != nil {\n\t\treturn errors.Trace(res[0].Error)\n\t}\n\tdelete(c.payloads, id)\n\n\treturn nil\n}"
  - source_sentence: /* PRIVATE
    sentences:
      - |-
        void activate(ProtocolVersion helloVersion) throws IOException {
                if (activeProtocols == null) {
                    activeProtocols = getActiveProtocols();
                }

                if (activeProtocols.collection().isEmpty() ||
                        activeProtocols.max.v == ProtocolVersion.NONE.v) {
                    throw new SSLHandshakeException(
                            "No appropriate protocol (protocol is disabled or " +
                            "cipher suites are inappropriate)");
                }

                if (activeCipherSuites == null) {
                    activeCipherSuites = getActiveCipherSuites();
                }

                if (activeCipherSuites.collection().isEmpty()) {
                    throw new SSLHandshakeException("No appropriate cipher suite");
                }

                // temporary protocol version until the actual protocol version
                // is negotiated in the Hello exchange. This affects the record
                // version we sent with the ClientHello.
                if (!isInitialHandshake) {
                    protocolVersion = activeProtocolVersion;
                } else {
                    protocolVersion = activeProtocols.max;
                }

                if (helloVersion == null || helloVersion.v == ProtocolVersion.NONE.v) {
                    helloVersion = activeProtocols.helloVersion;
                }

                // We accumulate digests of the handshake messages so that
                // we can read/write CertificateVerify and Finished messages,
                // getting assurance against some particular active attacks.
                Set<String> localSupportedHashAlgorithms =
                    SignatureAndHashAlgorithm.getHashAlgorithmNames(
                        getLocalSupportedSignAlgs());
                handshakeHash = new HandshakeHash(!isClient, needCertVerify,
                    localSupportedHashAlgorithms);

                // Generate handshake input/output stream.
                input = new HandshakeInStream(handshakeHash);
                if (conn != null) {
                    output = new HandshakeOutStream(protocolVersion, helloVersion,
                                                handshakeHash, conn);
                    conn.getAppInputStream().r.setHandshakeHash(handshakeHash);
                    conn.getAppInputStream().r.setHelloVersion(helloVersion);
                    conn.getAppOutputStream().r.setHelloVersion(helloVersion);
                } else {
                    output = new HandshakeOutStream(protocolVersion, helloVersion,
                                                handshakeHash, engine);
                    engine.inputRecord.setHandshakeHash(handshakeHash);
                    engine.inputRecord.setHelloVersion(helloVersion);
                    engine.outputRecord.setHelloVersion(helloVersion);
                }

                // move state to activated
                state = -1;
            }
      - |-
        function _error(coreIndex, cores) {
          var errMsg =
            '[cpu-stats] Error: Core "' + coreIndex + '" not found, use one of ' +
            '[0, ' + (cores - 1) + '], ' +
            'since your system has a total of ' + cores + ' cores.';
          console.log(errMsg);
        }
      - |-
        function _drawLine(v0, v1, color) {
                var p = new Primitive();

                p.vertices = [v0, v1];
                p.color = toColor(color);

                renderer.addPrimitive(p);
            }
pipeline_tag: sentence-similarity
library_name: sentence-transformers

SentenceTransformer based on Shuu12121/CodeModernBERT-Crow

This is a sentence-transformers model finetuned from Shuu12121/CodeModernBERT-Crow. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: Shuu12121/CodeModernBERT-Crow
  • Maximum Sequence Length: 1024 tokens
  • Output Dimensionality: 768 dimensions
  • Similarity Function: Cosine Similarity

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("sentence_transformers_model_id")
# Run inference
sentences = [
    '/* PRIVATE',
    'function _error(coreIndex, cores) {\n  var errMsg =\n    \'[cpu-stats] Error: Core "\' + coreIndex + \'" not found, use one of \' +\n    \'[0, \' + (cores - 1) + \'], \' +\n    \'since your system has a total of \' + cores + \' cores.\';\n  console.log(errMsg);\n}',
    'function _drawLine(v0, v1, color) {\n        var p = new Primitive();\n\n        p.vertices = [v0, v1];\n        p.color = toColor(color);\n\n        renderer.addPrimitive(p);\n    }',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Training Details

Training Dataset

Unnamed Dataset

  • Size: 2,022,217 training samples
  • Columns: sentence_0, sentence_1, and label
  • Approximate statistics based on the first 1000 samples:
    sentence_0 sentence_1 label
    type string string float
    details
    • min: 3 tokens
    • mean: 48.56 tokens
    • max: 1024 tokens
    • min: 30 tokens
    • mean: 171.79 tokens
    • max: 1024 tokens
    • min: 1.0
    • mean: 1.0
    • max: 1.0
  • Samples:
    sentence_0 sentence_1 label
    // GetNodeID returns the NodeID field if it's non-nil, zero value otherwise. func (a *App) GetNodeID() string {
    if a == nil
    // _NET_WM_STRUT_PARTIAL set func WmStrutPartialSet(xu *xgbutil.XUtil, win xproto.Window,
    struts *WmStrutPartial) error {

    rawStruts := make([]uint, 12)
    rawStruts[0] = struts.Left
    rawStruts[1] = struts.Right
    rawStruts[2] = struts.Top
    rawStruts[3] = struts.Bottom
    rawStruts[4] = struts.LeftStartY
    rawStruts[5] = struts.LeftEndY
    rawStruts[6] = struts.RightStartY
    rawStruts[7] = struts.RightEndY
    rawStruts[8] = struts.TopStartX
    rawStruts[9] = struts.TopEndX
    rawStruts[10] = struts.BottomStartX
    rawStruts[11] = struts.BottomEndX

    return xprop.ChangeProp32(xu, win, "_NET_WM_STRUT_PARTIAL", "CARDINAL",
    rawStruts...)
    }
    1.0
    //GetQyAccessToken 获取access_token func (ctx *Context) GetQyAccessToken() (accessToken string, err error) {
    ctx.accessTokenLock.Lock()
    defer ctx.accessTokenLock.Unlock()

    accessTokenCacheKey := fmt.Sprintf("qy_access_token_%s", ctx.AppID)
    val := ctx.Cache.Get(accessTokenCacheKey)
    if val != nil {
    accessToken = val.(string)
    return
    }

    //从微信服务器获取
    var resQyAccessToken ResQyAccessToken
    resQyAccessToken, err = ctx.GetQyAccessTokenFromServer()
    if err != nil {
    return
    }

    accessToken = resQyAccessToken.AccessToken
    return
    }
    1.0
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim"
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • per_device_train_batch_size: 256
  • per_device_eval_batch_size: 256
  • num_train_epochs: 5
  • fp16: True
  • multi_dataset_batch_sampler: round_robin

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: no
  • prediction_loss_only: True
  • per_device_train_batch_size: 256
  • per_device_eval_batch_size: 256
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.0
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • tp_size: 0
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: None
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • include_for_metrics: []
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • use_liger_kernel: False
  • eval_use_gather_object: False
  • average_tokens_across_devices: False
  • prompts: None
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: round_robin

Training Logs

Epoch Step Training Loss
0.0633 500 0.8015
0.1266 1000 0.1036
0.1899 1500 0.0973
0.2532 2000 0.0921
0.3165 2500 0.0876
0.3797 3000 0.0861
0.4430 3500 0.0843
0.5063 4000 0.0841
0.5696 4500 0.0788
0.6329 5000 0.0794
0.6962 5500 0.0782
0.7595 6000 0.077
0.8228 6500 0.0749
0.8861 7000 0.0749
0.9494 7500 0.0724
1.0127 8000 0.0658
1.0759 8500 0.0385
1.1392 9000 0.0381
1.2025 9500 0.0383
1.2658 10000 0.0381
1.3291 10500 0.0382
1.3924 11000 0.0384
1.4557 11500 0.0384
1.5190 12000 0.039
1.5823 12500 0.0391
1.6456 13000 0.0401
1.7089 13500 0.0383
1.7722 14000 0.0392
1.8354 14500 0.0371
1.8987 15000 0.0387
1.9620 15500 0.0385
2.0253 16000 0.0298
2.0886 16500 0.0171
2.1519 17000 0.0174
2.2152 17500 0.0171
2.2785 18000 0.0169
2.3418 18500 0.0174
2.4051 19000 0.0177
2.4684 19500 0.0175
2.5316 20000 0.0171
2.5949 20500 0.017
2.6582 21000 0.0172
2.7215 21500 0.0178
2.7848 22000 0.0167
2.8481 22500 0.0176
2.9114 23000 0.0175
2.9747 23500 0.0178
3.0380 24000 0.0129
3.1013 24500 0.0099
3.1646 25000 0.0097
3.2278 25500 0.0097
3.2911 26000 0.0101
3.3544 26500 0.0098
3.4177 27000 0.0099
3.4810 27500 0.0096
3.5443 28000 0.0095
3.6076 28500 0.0094
3.6709 29000 0.0097
3.7342 29500 0.01
3.7975 30000 0.0096
3.8608 30500 0.0098
3.9241 31000 0.0095
3.9873 31500 0.0094
4.0506 32000 0.0079
4.1139 32500 0.0074
4.1772 33000 0.0072
4.2405 33500 0.0073
4.3038 34000 0.0071
4.3671 34500 0.0073
4.4304 35000 0.007
4.4937 35500 0.0072
4.5570 36000 0.0071
4.6203 36500 0.0071
4.6835 37000 0.0072
4.7468 37500 0.0072
4.8101 38000 0.0069
4.8734 38500 0.007
4.9367 39000 0.007
5.0 39500 0.007

Framework Versions

  • Python: 3.11.11
  • Sentence Transformers: 3.4.1
  • Transformers: 4.51.3
  • PyTorch: 2.5.1+cu124
  • Accelerate: 1.3.0
  • Datasets: 3.5.0
  • Tokenizers: 0.21.0

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}