summaryrefslogtreecommitdiff
path: root/fine_tune
diff options
context:
space:
mode:
authorzhang <zch921005@126.com>2022-08-22 22:24:15 +0800
committerzhang <zch921005@126.com>2022-08-22 22:24:15 +0800
commit2c19d4adf5df7281017fb7f5cfeb3f7083ea0d31 (patch)
tree11af5b3f1cbf393a673c604847aa855a410000be /fine_tune
parent94b6d3246c72eb3cae58a2fd18771e3c2c3e7cb2 (diff)
imports
Diffstat (limited to 'fine_tune')
-rw-r--r--fine_tune/bert/tutorials/06_dive_into.py49
1 files changed, 49 insertions, 0 deletions
diff --git a/fine_tune/bert/tutorials/06_dive_into.py b/fine_tune/bert/tutorials/06_dive_into.py
new file mode 100644
index 0000000..3ce3efa
--- /dev/null
+++ b/fine_tune/bert/tutorials/06_dive_into.py
@@ -0,0 +1,49 @@
+from bertviz.transformers_neuron_view import BertModel, BertConfig
+from transformers import BertTokenizer
+import torch
+import math
+
+import numpy as np
+
+np.random.seed(1234)
+
+max_length = 256
+config = BertConfig.from_pretrained("bert-base-uncased", output_attentions=True, output_hidden_states=True, return_dict=True)
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+config.max_position_embeddings = max_length
+
+
+
+from sklearn.datasets import fetch_20newsgroups
+newsgroups_train = fetch_20newsgroups(subset='train')
+inputs_tests = tokenizer(newsgroups_train['data'][:1],
+ truncation=True,
+ padding=True,
+ max_length=max_length,
+ return_tensors='pt')
+# print(inputs_tests['input_ids'])
+# with torch.no_grad():
+# model = BertModel(config)
+# # print(config)
+# embed_output = model.embeddings(inputs_tests['input_ids'], inputs_tests['token_type_ids'], )
+# model_output = model(**inputs_tests)
+# print(embed_output)
+# print(model_output[-1][0]['attn'][0, 0, :, :])
+
+# print(inputs_tests['input_ids'])
+with torch.no_grad():
+ model = BertModel(config)
+ # print(config)
+ embed_output = model.embeddings(inputs_tests['input_ids'], inputs_tests['token_type_ids'], )
+ print(embed_output)
+ model_output = model(**inputs_tests)
+ print(model_output[-1][0]['attn'][0, 0, :, :])
+ att_head_size = int(model.config.hidden_size/model.config.num_attention_heads)
+ Q_first_head = embed_output[0] @ model.encoder.layer[0].attention.self.query.weight.T[:, :att_head_size] + \
+ model.encoder.layer[0].attention.self.query.bias[:att_head_size]
+ K_first_head = embed_output[0] @ model.encoder.layer[0].attention.self.key.weight.T[:, :att_head_size] + \
+ model.encoder.layer[0].attention.self.key.bias[:att_head_size]
+ # mod_attention = (1.0 - inputs_tests['attention_mask'][[0]]) * -10000.0
+ attention_scores = torch.nn.Softmax(dim=-1)((Q_first_head @ K_first_head.T)/ math.sqrt(att_head_size))
+ print(attention_scores)
+