From 2fe442cce5ddeeddf66958ee42a9d2d244d1a39e Mon Sep 17 00:00:00 2001 From: zhang Date: Sun, 23 Oct 2022 11:39:29 +0800 Subject: update --- misc/process.py | 29 ++++++++++++++ misc/raw_chinese.txt | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++ misc/raw_english.txt | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 misc/process.py create mode 100644 misc/raw_chinese.txt create mode 100644 misc/raw_english.txt (limited to 'misc') diff --git a/misc/process.py b/misc/process.py new file mode 100644 index 0000000..e613615 --- /dev/null +++ b/misc/process.py @@ -0,0 +1,29 @@ + +from datetime import datetime + +def merge(): + chinese = open('./raw_chinese.txt', encoding='UTF-8').readlines() + english = open('./raw_english.txt', encoding='UTF-8').readlines() + merge = open('./merge_{}.txt'.format(today), 'w', encoding='UTF-8') + for c_row, e_row in zip(chinese, english): + c_row = c_row.replace(',', ' ').replace(',', ' ') + merge.write(' '.join(c_row.split()) + '\n') + # e_row = e_row.replace(',', ' ').replace(',', ' ') + merge.write(e_row) + +def split(): + merge = open('./merge_{}.txt'.format(today), encoding='utf-8').readlines() + chinese = open('./chinese_{}.txt'.format(today), 'w', encoding='utf-8') + english = open('./english_{}.txt'.format(today), 'w', encoding='utf-8') + for i, row in enumerate(merge): + if i % 2 == 0: + chinese.write(row) + else: + english.write(row) + +if __name__ == '__main__': + today = datetime.now().strftime('%Y%m%d') + # merge() + split() + pass + diff --git a/misc/raw_chinese.txt b/misc/raw_chinese.txt new file mode 100644 index 0000000..098856e --- /dev/null +++ b/misc/raw_chinese.txt @@ -0,0 +1,105 @@ +点开始之后大家就可以做了 +难以找到正确的思路的话 +就不知道该填啥 +有时候没有什么头绪 +测试者想要取得高分数 +强烈地依赖于他们从小样本之中 +提取信息 +进行抽象推理(的能力) +这是不能通过刷题来得到高分的 +迈向通用人工智能 +探索拥有人类智商水平的认知智能 +“人工智能”一直是人类 +探索自身能力边界的窗口 +近几年,以深度学习方法为代表的 +人工智能在感知层面上取得了显著的进展 +但是现有的模型距离具有人类通用 +认知能力的智能仍然具有较大差距 +研究表明,在判断两个图形 +是否是同类的例子中 +大猩猩(Capuchin monkeys)这样的灵长类动物就可以顺利完成 +这说明动物具有一种与生俱来的认知构架 +这种认知构架使得他们能够 +从小数据中寻找解决问题的通用范式 +认知构架的优势在人类中体现得尤为明显 +比如,对于稍微复杂的几何问题 +亚马逊雨林中的原始部落人群(Amazonian indigene group) +仍然能够轻易解决 +然而,以Transformer为代表的深度学习大模型(foundation model) +却在类似的测试中相形见绌 +模型在训练时不仅需要大量的标注数据 +且最终性能也无法与人类做比较 +对于智能水平的衡量 +一般是基于智力商数进行的 +也就是我们常说的“智商”或者“IQ” +心理学家创造了一系列测试 +来数值化智商 +并发现智商的高低 +与人的成就具有很高的相关性 +在这些测试之中 +比较有代表性的就是瑞文测试(Raven's Progressive Matrices) +下面这个题目就是瑞文测试中的一个样例 +这个例子乍一看很复杂 +只有8张图片且物体形态各异 +但仔细分析可以发现 +每行中物体颜色都是深灰、浅灰和黑色 +同时,每张图片中物体大小基本一致 +由此,不难推出正确答案 +离群样本选择(Odd-One-Out) +则需要被试从几个例子中挑出一个离群的数据点 +比如下题中 +只有第三张图片中有一个深黑色的六边形 +对于传统的感知智能体 +我们需要提供成千上万个例子 +机器才能学会一个猫或者狗的概念 +但是对于一个认知智能体 +仅依赖几张图片,机器就能从一个巨大的空间中抽象出对应的事件, +并理解其在时间-空间-因果三个层次上的关系 +探索具有人类认知智能能力的模型是 +北京通用人工智能研究院的一个基础研究项目 +集结了北京通用人工智能研究院和UCLA的学者 +共同解决这个挑战性的问题 +如何用小数据理解IQ测试中的时空因果关系 +经过数年的研究 +我们提出了通慧(Tong-Hui)模型 +这个夏天,我们邀请了来自国内 +顶尖学府的同学与我们的通慧(Tong-Hui)模型 +进行一次比拼 +在之前的测试中 +我们对模型的能力有一个大致的估计 +但是当面对真正高智商的人类对手时 +我们也摸不准我们的模型到底有怎样的表现 +好,那我现在点开始 +点开始之后大家就可以做了 +同学们时常有着各种各样的奇思妙想 +但我们的程序可能不一定能产生类似的思路 +所以对于比赛的结果 +总有一些不确定性 +开始觉得还挺简单的 +然后后面就感觉有一点吃力了 +然后有时候没有什么头绪 +错了六七个吧 +难以找到正确的思路的话,就会不断地错,就一直找不出规律 +然后就不知道该填啥 +就是我需要花费很多时间 +去想我该朝哪个方向去思考 +但是机器我觉得它可以短时间 +内迅速地尝试各种的、很多很多种情况 +好,谢谢大家 +通慧模型完胜所有的学生 +也超过了以Transformer为代表的大模型 +第一项左上角是一个五边形 +其他的是没有五边形的 +我们在这项任务中打败了全国最优秀的同学 +下一步的目标是为人工智能的定级 +提供更加有力的标准 +并在更加全面的环境下评估我们的通用人工智能系统 +我们曾经一直在想 +如果真的有那么一天我们创造的智能 +能够超过世界上最聪明的大脑 +那我们一定是发现了某种通用的算法 +乃至一整套全新的认知架构 +也许我们现在就已经站在 +通用人工智能的门口 +这次比赛的成功 +让我们离通用人工智能又迈进了一步 \ No newline at end of file diff --git a/misc/raw_english.txt b/misc/raw_english.txt new file mode 100644 index 0000000..15068c4 --- /dev/null +++ b/misc/raw_english.txt @@ -0,0 +1,105 @@ +You can start by clicking the "start" button. +You will not know what to fill in +if you don't have the correct way of thinking. +Sometimes I don’t have any ideas. +To get a high score, +testers need to reason abstractly +from the information they extracted +from small samples. +A high score cannot be obtained by just doing a lot of exercises. +Towards general artificial intelligence. +Exploring cognitive intelligence with human-level intelligence quotient. +"Artificial intelligence" has always been a window for humans +to explore the boundaries of their capabilities. +In recent years, significant progress in artificial intelligence +represented by deep learning has been made at the perception level, +but there is still a long way for existing models +to achieve intelligence with general human-level cognitive capabilities. +Research has shown that in the case of determining +whether two figures are alike, +primates like capuchin monkeys can do it successfully. +This indicates that animals have an innate cognitive architecture +that allows them to find generic paradigms +for solving problems from small data. +These advantages of cognitive framing are particularly evident in humans. +For slightly more complex geometric problems, for instance, +the Amazonian indigene group in the rainforest +can still solve them easily. +However, the deep learning foundation model represented by Transformer +is dwarfed in similar tests +not only does the model require a large amount of labeled data for training, +but its ultimate performance cannot be comparable to that of humans. +Intelligence levels are generally measured +based on intelligence quotients, +or "IQ" as it is often called. +Psychologists have created a series of tests +to numerically quantify IQ +and have found that IQ level has a high correlation +with human achievement. +Among these tests, +a representative one is Raven's Progressive Matrices. +The following question is an example. +This example is complicated at first glance, +which has only 8 pictures and the shapes of objects are different. +However, a closer analysis shows that +the objects in each row are all dark gray, light gray and black, +and the size of the objects in each picture is basically the same. +Thus, it is not difficult to find the correct answer. +In the case of Odd-One-Out, on the other hand, +subjects are required to pick an outlier data point from several examples. +For example, in the next question, +only the third picture has a dark black hexagon. +For traditional perceptual intelligences, +we need to provide thousands of examples +for the machine to learn the concept of a cat or a dog. +For a cognitive intelligent agent, +however, the machine can abstract the corresponding events from a huge space +from just a few pictures and understand their spatial-temporal-causal relationships. +Exploring models with human cognitive intelligence is +a fundamental research project of the Beijing Institute of General Artificial Intelligence (BIGAI), +in which scholars from BIGAI and UCLA cooperate to address this challenging problem: +how to use small data to understand +spatial-temporal-causal relationships in IQ tests. +After several years of study, +we proposed the Tong-Hui model. +In this summer, we invited students from +top universities in China to +have a competition with our Tong-Hui model. +In the preliminary tests, +we had a rough estimate of the capabilities of the model. +But when faced with truly highly intelligent human opponents, +we were not sure how our model would perform. +All right, I will click the "start" button +to begin the competition. +Students often have a variety of wondrous ideas, +but our program may not have similar thoughts. +Thus, we are not sure +about the result of the competition. +It was quite easy at the beginning, +then it was a little bit tough, +and later I had no idea. +I made 6 to 7 mistakes. +One would make mistakes if he cannot find the correct way of thinking and the hidden pattern, +and he would not know what to fill in. +We needed to spend +a lot of time thinking, +but the machine could quickly +try various solutions in a short period of time. +OK, thank you all. +The Tong Hui model outperformed all the students +and the foundation model represented by Transformer. +The first item in the upper left corner is a pentagon, +the others do not have pentagons. +We have beaten the best students in the country in this task. +Our next step is to provide more robust criteria +for the grading of AI +and to evaluate our general AI systems in a more comprehensive setting. +We were always thinking that +if we ever really created intelligence that +could outperform the world's smartest brains, +we must have discovered some kind of universal algorithm +or even a whole new cognitive architecture. +Perhaps we are already on +the doorstep of general AI right now, +and the success of this competition +will be a further step to it. \ No newline at end of file -- cgit v1.2.3