Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
Update conda meta (#415)
Browse files Browse the repository at this point in the history
  • Loading branch information
VincyZhang authored Nov 23, 2022
1 parent dec0997 commit 59544b0
Show file tree
Hide file tree
Showing 19 changed files with 3,679 additions and 42 deletions.
3 changes: 2 additions & 1 deletion conda_meta/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ build:
script_env:
- IMEX_WHL
number: {{buildnumber}}
noarch: python
script: pip install --no-deps {{IMEX_WHL}}
requirements:
build:
Expand All @@ -19,6 +18,8 @@ requirements:
- numpy
- transformers
- packaging
- neural_compressor
- protobuf
test:
imports:
- intel_extension_for_transformers
Expand Down
188 changes: 188 additions & 0 deletions docs/tutorials/pytorch/language-modeling/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import logging
import os
from datasets import load_dataset, load_metric
from itertools import chain
from intel_extension_for_transformers import metrics, OptimizedModel
from intel_extension_for_transformers.optimization.trainer import NLPTrainer
from argparse import ArgumentParser
from transformers import (
MODEL_FOR_MASKED_LM_MAPPING,
AutoConfig,
AutoModelForMaskedLM,
AutoModelForMultipleChoice,
AutoTokenizer,
DataCollatorForLanguageModeling,
TrainingArguments,
is_torch_tpu_available,
set_seed,
)

os.environ["WANDB_DISABLED"] = "true"

logger = logging.getLogger(__name__)
MODEL_CONFIG_CLASSES = list(MODEL_FOR_MASKED_LM_MAPPING.keys())
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

arg_parser = ArgumentParser(description='Parse args')
arg_parser.add_argument('--data_type', default = "int8", help='data type of model')
arg_parser.add_argument('--model_name_or_path', default = "bert-base-uncased", help = 'input model for benchmark')
args = arg_parser.parse_args()

dataset_name="wikitext"
dataset_config_name="wikitext-2-raw-v1"
training_args = TrainingArguments(
output_dir=args.mpdel_name_or_path,
do_eval=True,
do_train=True,
no_cuda=True,
per_device_eval_batch_size=1,
overwrite_output_dir=True
)

raw_datasets = load_dataset(dataset_name, dataset_config_name)
config = AutoConfig.from_pretrained(args.model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
# Set seed before initializing model.
set_seed(training_args.seed)

## start with int8 benchmarking
if args.data_type == "int8":
# Load the model obtained after Intel Neural Compressor (INC) quantization
model = OptimizedModel.from_pretrained(
args.model_name_or_path,
from_tf=bool(".ckpt" in args.model_name_or_path),
config=config,
revision="main",
use_auth_token=None,
)
else:
## original fp32 model benchmarking
model = AutoModelForMaskedLM.from_pretrained(
args.model_name_or_path,
config=config,
revision="main",
use_auth_token=None,
)
model.resize_token_embeddings(len(tokenizer))

# First we tokenize all the texts.
if training_args.do_train:
column_names = raw_datasets["train"].column_names
else:
column_names = raw_datasets["validation"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]

max_seq_length = tokenizer.model_max_length


def tokenize_function(examples):
return tokenizer(examples[text_column_name], return_special_tokens_mask=True)


column_names = raw_datasets["train"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]

with training_args.main_process_first(desc="dataset map tokenization"):
tokenized_datasets = raw_datasets.map(
tokenize_function,
batched=True,
remove_columns=column_names,
load_from_cache_file=True,
desc="Running tokenizer on every text in dataset",
)


# Main data processing function that will concatenate all texts from our dataset and generate chunks of max_seq_length.
def group_texts(examples):
# Concatenate all texts.
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
total_length = len(concatenated_examples[list(examples.keys())[0]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
if total_length >= max_seq_length:
total_length = (total_length // max_seq_length) * max_seq_length
# Split by chunks of max_len.
result = {
k: [t[i: i + max_seq_length] for i in range(0, total_length, max_seq_length)]
for k, t in concatenated_examples.items()
}
return result


# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a
# remainder for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value
# might be slower to preprocess.

with training_args.main_process_first(desc="grouping texts together"):
tokenized_datasets = tokenized_datasets.map(
group_texts,
batched=True,
load_from_cache_file=True,
desc=f"Grouping texts in chunks of {max_seq_length}",
)

if training_args.do_train:
if "train" not in tokenized_datasets:
raise ValueError("--do_train requires a train dataset")
train_dataset = tokenized_datasets["train"]

if training_args.do_eval:
if "validation" not in tokenized_datasets:
raise ValueError("--do_eval requires a validation dataset")
eval_dataset = tokenized_datasets["validation"]


def preprocess_logits_for_metrics(logits, labels):
if isinstance(logits, tuple):
# Depending on the model and config, logits may contain extra tensors,
# like past_key_values, but logits always come first
logits = logits[0]
return logits.argmax(dim=-1)


metric = load_metric("accuracy")


def compute_metrics(eval_preds):
preds, labels = eval_preds
# preds have the same shape as the labels, after the argmax(-1) has been calculated
# by preprocess_logits_for_metrics
labels = labels.reshape(-1)
preds = preds.reshape(-1)
mask = labels != -100
labels = labels[mask]
preds = preds[mask]
return metric.compute(predictions=preds, references=labels)

# Data collator will take care of randomly masking the tokens.
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm_probability=0.15,
pad_to_multiple_of=None,
)

# Initialize the Trainer
set_seed(training_args.seed)
trainer = NLPTrainer(
model=model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,
preprocess_logits_for_metrics=preprocess_logits_for_metrics
if training_args.do_eval and not is_torch_tpu_available()
else None,
)

results = trainer.evaluate()
bert_task_acc_keys = ['eval_loss', 'eval_f1', 'eval_accuracy', 'eval_matthews_correlation',
'eval_pearson', 'eval_mcc', 'eval_spearmanr']

throughput = results.get("eval_samples_per_second")
eval_loss = results["eval_loss"]
print('Batch size = {}'.format(training_args.per_device_eval_batch_size))
print("Finally Eval eval_loss Accuracy: {}".format(eval_loss))
print("Latency: {:.3f} ms".format(1000 / throughput))
print("Throughput: {} samples/sec".format(throughput))
78 changes: 70 additions & 8 deletions docs/tutorials/pytorch/language-modeling/bert-base-uncased.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"id": "c1816be1",
"metadata": {},
"source": [
"* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. "
"* Follow [installation](https://github.com/intel/intel_extension_for_transformers#installation) to install **intel-extension-for-transformers**. "
]
},
{
Expand Down Expand Up @@ -167,7 +167,7 @@
" dataset_config_name=\"wikitext-2-raw-v1\",\n",
")\n",
"training_args = TrainingArguments(\n",
" output_dir=\"./saved_results\",\n",
" output_dir=\"./saved_results_static\",\n",
" do_eval=True,\n",
" do_train=True,\n",
" no_cuda=True,\n",
Expand Down Expand Up @@ -367,8 +367,6 @@
" else None,\n",
")\n",
"\n",
"trainer_ptq_static.save_model(\"./saved_results_ptq_static\") # quantized model\n",
"\n",
"tune_metric = metrics.Metric(\n",
" name=\"eval_loss\", # Metric used for the tuning strategy.\n",
" is_relative=True, # Metric tolerance mode, True is for relative, otherwise for absolute.\n",
Expand All @@ -381,7 +379,11 @@
")\n",
"\n",
"# run quantization\n",
"trainer_ptq_static.quantize(quant_config=quantization_config)"
"trainer_ptq_static.quantize(quant_config=quantization_config)\n",
"\n",
"# save quantized model\n",
"trainer_ptq_static.save_model(\"./saved_results_static\")\n",
"model.config.save_pretrained(\"./saved_results_static\")"
]
},
{
Expand Down Expand Up @@ -414,6 +416,26 @@
"print(\"Throughput: {} samples/sec\".format(throughput_ptq_static))"
]
},
{
"cell_type": "markdown",
"id": "5a7e93de",
"metadata": {},
"source": [
"## Run Benchmark after Static Post Training Quantization with Multi-Instance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a6795aa",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.system('numactl --hardware')\n",
"results = os.system('bash ../multi_instance.sh --model=saved_results_static --core_per_instance=4 --data_type=int8')"
]
},
{
"cell_type": "markdown",
"id": "7a51f6ca",
Expand All @@ -431,6 +453,7 @@
"source": [
"# Initialize the Trainer\n",
"set_seed(training_args.seed)\n",
"training_args.output_dir = \"./saved_results_dynamic\"\n",
"trainer_ptq_dynamic = NLPTrainer(\n",
" model=model,\n",
" args=training_args,\n",
Expand All @@ -444,8 +467,6 @@
" else None,\n",
")\n",
"\n",
"trainer_ptq_dynamic.save_model(\"./saved_results_ptq_dynamic\")\n",
"\n",
"tune_metric = metrics.Metric(\n",
" name=\"eval_loss\", \n",
" is_relative=True,\n",
Expand All @@ -458,7 +479,10 @@
")\n",
"\n",
"# run quantization\n",
"trainer_ptq_dynamic.quantize(quant_config=quantization_config)"
"trainer_ptq_dynamic.quantize(quant_config=quantization_config)\n",
"\n",
"# save quantized model\n",
"trainer_ptq_dynamic.save_model(\"./saved_results_dynamic\")"
]
},
{
Expand Down Expand Up @@ -487,6 +511,25 @@
"print(\"Throughput: {} samples/sec\".format(throughput_ptq_dynamic))"
]
},
{
"cell_type": "markdown",
"id": "5a7e93de",
"metadata": {},
"source": [
"## Run Benchmark after Dynamic Post Training Quantization with Multi-Instance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea631f92",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"results = os.system('bash ../multi_instance.sh --model=saved_results_dynamic --core_per_instance=4 --data_type=int8')"
]
},
{
"cell_type": "markdown",
"id": "44cca2a1",
Expand Down Expand Up @@ -527,6 +570,25 @@
"print(\"Latency: {:.3f} ms\".format(1000 / throughput_fp32))\n",
"print(\"Throughput: {} samples/sec\".format(throughput_fp32))"
]
},
{
"cell_type": "markdown",
"id": "5a7e93de",
"metadata": {},
"source": [
"## Run Benchmark for FP32 Model with Multi-Instance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "571317cf",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"results = os.system('bash ../multi_instance.sh --model=bert-base-uncased --core_per_instance=4 --data_type=fp32')"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 59544b0

Please sign in to comment.