diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd3193c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__/utils.cpython-38.pyc
+__pycache__/evaluate.cpython-38.pyc
+__pycache__/data_loader.cpython-38.pyc
+__pycache__/metrics.cpython-38.pyc
diff --git a/README.md b/README.md
index 4d97332..d98f91c 100644
--- a/README.md
+++ b/README.md
@@ -48,9 +48,8 @@ We randomly select 3000 samples from the training set as the validation set, and
 
 This repo was tested on Python 3.5+ and PyTorch 0.4.1/1.0.0. The requirements are:
 
-- tensorflow >= 1.11.0
-- torch >= 0.4.1
-- pytorch-pretrained-bert == 0.4.0
+- torch >= 1.10.0
+- transformers > 4.12.0
 - tqdm
 - apex
 
@@ -84,34 +83,7 @@ Based on the best model on the validation set, we can get the recognition effect
 
 1. **Get BERT model for PyTorch**
 
-   There are two ways to get the pre-trained BERT model in a PyTorch dump for your experiments :
-
-   - **Direct download of the converted pytorch version of the BERT model**
-
-     You can download the pytorch dump I converted from the tensorflow checkpont from my Google Cloud Drive folder [`bert-base-chinese-pytorch`](https://drive.google.com/drive/folders/1K_xCYMCEfjpPjedSnMyL9zMVzqbanQX9), including the BERT parameters file `bert_config.json`, the model file `pytorch_model.bin` and the vocabulary file `vocab.txt`.
-
-   - **Convert the TensorFlow checkpoint to a PyTorch dump by yourself**
-
-     - Download the Google's BERT base model for Chinese from **[`BERT-Base, Chinese`](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip)** (Chinese Simplified and Traditional, 12-layer, 768-hidden, 12-heads, 110M parameters), and decompress it.
-
-     - Execute the following command,  convert the TensorFlow checkpoint to a PyTorch dump.
-
-       ```shell
-       export TF_BERT_BASE_DIR=/path/to/chinese_L-12_H-768_A-12
-       export PT_BERT_BASE_DIR=/path/to/NER-BERT-pytorch/bert-base-chinese-pytorch
-       
-       pytorch_pretrained_bert convert_tf_checkpoint_to_pytorch \
-       	$TF_BERT_BASE_DIR/bert_model.ckpt \
-       	$TF_BERT_BASE_DIR/bert_config.json \
-       	$PT_BERT_BASE_DIR/pytorch_model.bin
-       ```
-
-     - Copy the BERT parameters file `bert_config.json` and dictionary file `vocab.txt` to the directory `$PT_BERT_BASE_DIR`.
-
-       ```shell
-       cp $TF_BERT_BASE_DIR/bert_config.json $PT_BERT_BASE_DIR/bert_config.json
-       cp $TF_BERT_BASE_DIR/vocab.txt $PT_BERT_BASE_DIR/vocab.txt
-       ```
+   do nothing ( it can donwload bert-base-chinese automatically)
 
 2. **Build dataset and tags**
 
diff --git a/data_loader.py b/data_loader.py
index 9462ace..28f5b50 100644
--- a/data_loader.py
+++ b/data_loader.py
@@ -7,7 +7,7 @@
 
 import torch
 
-from pytorch_pretrained_bert import BertTokenizer
+from transformers import BertTokenizer
 
 import utils
 
diff --git a/evaluate.py b/evaluate.py
index fcf81bb..336b01d 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 
-from pytorch_pretrained_bert import BertForTokenClassification, BertConfig
+from transformers import BertForTokenClassification, BertConfig
 
 from metrics import f1_score
 from metrics import classification_report
@@ -45,6 +45,7 @@ def evaluate(model, data_iterator, params, mark='Eval', verbose=False):
         batch_masks = batch_data.gt(0)
 
         loss = model(batch_data, token_type_ids=None, attention_mask=batch_masks, labels=batch_tags)
+        loss=loss[0]
         if params.n_gpu > 1 and params.multi_gpu:
             loss = loss.mean()
         loss_avg.update(loss.item())
diff --git a/experiments/base_model/evaluate.log b/experiments/base_model/evaluate.log
index 41c2ccd..d1fb89b 100644
--- a/experiments/base_model/evaluate.log
+++ b/experiments/base_model/evaluate.log
@@ -28,3 +28,4 @@
 
 avg / total      94.54     94.73     94.63      5449
 
+2022-02-05 14:26:42,389:INFO: Loading the dataset...
diff --git a/experiments/base_model/train.log b/experiments/base_model/train.log
index 6dfea67..30623af 100644
--- a/experiments/base_model/train.log
+++ b/experiments/base_model/train.log
@@ -69,3 +69,19 @@
 2019-01-25 05:48:44,861:INFO: - Train metrics: loss: 00.00; f1: 99.90
 2019-01-25 05:49:07,503:INFO: - Val metrics: loss: 00.03; f1: 95.88
 2019-01-25 05:49:12,045:INFO: Best val f1: 95.90
+2022-02-05 14:27:16,705:INFO: device: cuda, n_gpu: 1, 16-bits training: False
+2022-02-05 14:27:16,705:INFO: Loading the datasets...
+2022-02-05 14:27:45,285:INFO: device: cuda, n_gpu: 1, 16-bits training: False
+2022-02-05 14:27:45,285:INFO: Loading the datasets...
+2022-02-05 14:28:39,517:INFO: Starting training for 20 epoch(s)
+2022-02-05 14:28:39,517:INFO: Epoch 1/20
+2022-02-05 14:34:00,740:INFO: device: cuda, n_gpu: 1, 16-bits training: False
+2022-02-05 14:34:00,741:INFO: Loading the datasets...
+2022-02-05 14:34:54,650:INFO: Starting training for 20 epoch(s)
+2022-02-05 14:34:54,651:INFO: Epoch 1/20
+2022-02-05 14:40:08,016:INFO: device: cuda, n_gpu: 1, 16-bits training: False
+2022-02-05 14:40:08,017:INFO: Loading the datasets...
+2022-02-05 14:41:01,202:INFO: Starting training for 20 epoch(s)
+2022-02-05 14:41:01,202:INFO: Epoch 1/20
+2022-02-05 14:46:01,254:INFO: device: cuda, n_gpu: 1, 16-bits training: False
+2022-02-05 14:46:01,254:INFO: Loading the datasets...
diff --git a/requirements.txt b/requirements.txt
index 2ae8242..47d048e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,8 @@
-# TensorFlow
-tensorflow >= 1.11.0
 # PyTorch
-torch >= 0.4.1
+torch >= 1.10.0
 # progress bars in model download and training scripts
 tqdm
 # A PyTorch implementation of Google AI's BERT model
-pytorch-pretrained-bert == 0.4.0
+transformers >= 4.12.0
 # A tool for easy mixed precision and distributed training in Pytorch, https://github.com/NVIDIA/apex
-apex
\ No newline at end of file
+apex
diff --git a/train.py b/train.py
index 5c28104..e2fedf6 100644
--- a/train.py
+++ b/train.py
@@ -11,7 +11,7 @@
 from torch.optim.lr_scheduler import LambdaLR
 from tqdm import trange
 
-from pytorch_pretrained_bert import BertForTokenClassification
+from transformers import BertForTokenClassification
 
 from data_loader import DataLoader
 from evaluate import evaluate
@@ -20,7 +20,7 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--data_dir', default='data/msra', help="Directory containing the dataset")
-parser.add_argument('--bert_model_dir', default='bert-base-chinese-pytorch', help="Directory containing the BERT model in PyTorch")
+parser.add_argument('--bert_model_dir', default='bert-base-chinese', help="Directory containing the BERT model in PyTorch")
 parser.add_argument('--model_dir', default='experiments/base_model', help="Directory containing params.json")
 parser.add_argument('--seed', type=int, default=2019, help="random seed for initialization")
 parser.add_argument('--restore_file', default=None,
@@ -51,7 +51,7 @@ def train(model, data_iterator, optimizer, scheduler, params):
 
         # compute model output and loss
         loss = model(batch_data, token_type_ids=None, attention_mask=batch_masks, labels=batch_tags)
-
+        loss=loss[0]
         if params.n_gpu > 1 and args.multi_gpu:
             loss = loss.mean()  # mean() to average on multi-gpu