diff --git a/notebooks/openvino/sentence_transformer_quantization.ipynb b/notebooks/openvino/sentence_transformer_quantization.ipynb index db5d20297..2de76c6e4 100644 --- a/notebooks/openvino/sentence_transformer_quantization.ipynb +++ b/notebooks/openvino/sentence_transformer_quantization.ipynb @@ -177,9 +177,11 @@ "\n", "quantizer = OVQuantizer.from_pretrained(model)\n", "\n", + "\n", "def preprocess_function(examples, tokenizer):\n", " return tokenizer(examples[\"sentence\"], padding=\"max_length\", max_length=384, truncation=True)\n", "\n", + "\n", "calibration_dataset = quantizer.get_calibration_dataset(\n", " \"glue\",\n", " dataset_config_name=\"sst2\",\n", @@ -194,13 +196,6 @@ "tokenizer.save_pretrained(int8_ptq_model_path)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -216,11 +211,12 @@ "source": [ "from transformers import Pipeline\n", "import torch.nn.functional as F\n", - "import torch \n", + "import torch\n", + "\n", "\n", "# copied from the model card\n", "def mean_pooling(model_output, attention_mask):\n", - " token_embeddings = model_output[0] #First element of model_output contains all token embeddings\n", + " token_embeddings = model_output[0] # First element of model_output contains all token embeddings\n", " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n", " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n", "\n", @@ -230,7 +226,7 @@ " # we don\"t have any hyperameters to sanitize\n", " preprocess_kwargs = {}\n", " return preprocess_kwargs, {}, {}\n", - " \n", + "\n", " def preprocess(self, inputs):\n", " encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors=\"pt\")\n", " return encoded_inputs\n", @@ -283,7 +279,7 @@ "from datasets import load_dataset\n", "from evaluate import load\n", "\n", - "eval_dataset = load_dataset(\"glue\",\"stsb\",split=\"validation\")\n", + "eval_dataset = load_dataset(\"glue\", \"stsb\", split=\"validation\")\n", "metric = load(\"glue\", \"stsb\")" ] }, @@ -315,7 +311,7 @@ } ], "source": [ - "def compute_sentence_similarity(sentence_1, sentence_2,pipeline):\n", + "def compute_sentence_similarity(sentence_1, sentence_2, pipeline):\n", " embedding_1 = pipeline(sentence_1)\n", " embedding_2 = pipeline(sentence_2)\n", " # compute cosine similarity between two sentences\n", @@ -323,13 +319,14 @@ "\n", "\n", "def evaluate_stsb(example):\n", - " default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n", - " quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n", - " return {\n", - " \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n", - " \"default\": float(default),\n", - " \"quantized\": float(quantized),\n", - " }\n", + " default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n", + " quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n", + " return {\n", + " \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n", + " \"default\": float(default),\n", + " \"quantized\": float(quantized),\n", + " }\n", + "\n", "\n", "result = eval_dataset.map(evaluate_stsb)" ] @@ -353,9 +350,13 @@ "default_acc = metric.compute(predictions=result[\"default\"], references=result[\"reference\"])\n", "quantized = metric.compute(predictions=result[\"quantized\"], references=result[\"reference\"])\n", "\n", - "print(\"vanilla model: pearson=\", default_acc['pearson'])\n", - "print(\"quantized model: pearson=\", quantized['pearson'])\n", - "print(\"The quantized model achieves \", round(quantized[\"pearson\"]/default_acc[\"pearson\"],2)*100, \"% accuracy of the fp32 model\")" + "print(\"vanilla model: pearson=\", default_acc[\"pearson\"])\n", + "print(\"quantized model: pearson=\", quantized[\"pearson\"])\n", + "print(\n", + " \"The quantized model achieves \",\n", + " round(quantized[\"pearson\"] / default_acc[\"pearson\"], 2) * 100,\n", + " \"% accuracy of the fp32 model\",\n", + ")" ] }, {