summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--basics/python/circular/a.py13
-rw-r--r--basics/python/circular/b.py19
-rw-r--r--basics/python/circular/c.py10
-rw-r--r--fine_tune/bert/tutorials/06_dive_into.py49
4 files changed, 91 insertions, 0 deletions
diff --git a/basics/python/circular/a.py b/basics/python/circular/a.py
new file mode 100644
index 0000000..b32c70d
--- /dev/null
+++ b/basics/python/circular/a.py
@@ -0,0 +1,13 @@
+
+
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from b import B
+
+
+class A:
+ def foo(self, b: 'B'):
+ return 2*b.foo()
+
+ def t(self):
+ print('A.t()')
diff --git a/basics/python/circular/b.py b/basics/python/circular/b.py
new file mode 100644
index 0000000..643e2f7
--- /dev/null
+++ b/basics/python/circular/b.py
@@ -0,0 +1,19 @@
+
+
+# from typing import TYPE_CHECKING
+# if TYPE_CHECKING:
+from a import A
+
+
+class B:
+
+ def __init__(self):
+ self.a_items = []
+
+ def append(self, a: 'A'):
+ a.t()
+ self.a_items.append(a)
+
+ def foo(self):
+ return 5
+
diff --git a/basics/python/circular/c.py b/basics/python/circular/c.py
new file mode 100644
index 0000000..b17402d
--- /dev/null
+++ b/basics/python/circular/c.py
@@ -0,0 +1,10 @@
+
+from b import B
+from a import A
+
+
+a_inst = A()
+b_inst = B()
+
+print(a_inst.foo(b_inst))
+b_inst.append(a_inst)
diff --git a/fine_tune/bert/tutorials/06_dive_into.py b/fine_tune/bert/tutorials/06_dive_into.py
new file mode 100644
index 0000000..3ce3efa
--- /dev/null
+++ b/fine_tune/bert/tutorials/06_dive_into.py
@@ -0,0 +1,49 @@
+from bertviz.transformers_neuron_view import BertModel, BertConfig
+from transformers import BertTokenizer
+import torch
+import math
+
+import numpy as np
+
+np.random.seed(1234)
+
+max_length = 256
+config = BertConfig.from_pretrained("bert-base-uncased", output_attentions=True, output_hidden_states=True, return_dict=True)
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+config.max_position_embeddings = max_length
+
+
+
+from sklearn.datasets import fetch_20newsgroups
+newsgroups_train = fetch_20newsgroups(subset='train')
+inputs_tests = tokenizer(newsgroups_train['data'][:1],
+ truncation=True,
+ padding=True,
+ max_length=max_length,
+ return_tensors='pt')
+# print(inputs_tests['input_ids'])
+# with torch.no_grad():
+# model = BertModel(config)
+# # print(config)
+# embed_output = model.embeddings(inputs_tests['input_ids'], inputs_tests['token_type_ids'], )
+# model_output = model(**inputs_tests)
+# print(embed_output)
+# print(model_output[-1][0]['attn'][0, 0, :, :])
+
+# print(inputs_tests['input_ids'])
+with torch.no_grad():
+ model = BertModel(config)
+ # print(config)
+ embed_output = model.embeddings(inputs_tests['input_ids'], inputs_tests['token_type_ids'], )
+ print(embed_output)
+ model_output = model(**inputs_tests)
+ print(model_output[-1][0]['attn'][0, 0, :, :])
+ att_head_size = int(model.config.hidden_size/model.config.num_attention_heads)
+ Q_first_head = embed_output[0] @ model.encoder.layer[0].attention.self.query.weight.T[:, :att_head_size] + \
+ model.encoder.layer[0].attention.self.query.bias[:att_head_size]
+ K_first_head = embed_output[0] @ model.encoder.layer[0].attention.self.key.weight.T[:, :att_head_size] + \
+ model.encoder.layer[0].attention.self.key.bias[:att_head_size]
+ # mod_attention = (1.0 - inputs_tests['attention_mask'][[0]]) * -10000.0
+ attention_scores = torch.nn.Softmax(dim=-1)((Q_first_head @ K_first_head.T)/ math.sqrt(att_head_size))
+ print(attention_scores)
+