From 7cbbd04247f24cac4baa0e8d0f0979aa058856e7 Mon Sep 17 00:00:00 2001
From: zhang <zch921005@126.com>
Date: Sat, 29 Feb 2020 23:07:55 +0800
Subject: =?UTF-8?q?numpy=E5=AE=9E=E7=8E=B0t=E6=A3=80=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 stats/t-test.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 stats/t-test.py

(limited to 'stats/t-test.py')

diff --git a/stats/t-test.py b/stats/t-test.py
new file mode 100644
index 0000000..3ebba84
--- /dev/null
+++ b/stats/t-test.py
@@ -0,0 +1,50 @@
+## Import the packages
+import numpy as np
+from scipy import stats
+
+
+## Define 2 random distributions
+#Sample Size
+N = 10
+#Gaussian distributed data with mean = 2 and var = 1
+a = np.random.randn(N) + 2
+#Gaussian distributed data with with mean = 0 and var = 1
+b = np.random.randn(N)
+
+
+## Calculate the Standard Deviation
+#Calculate the variance to get the standard deviation
+
+#For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1
+var_a = a.var(ddof=1)
+var_b = b.var(ddof=1)
+
+#std deviation
+s = np.sqrt((var_a + var_b)/2)
+print(a)
+print(b)
+
+
+
+## Calculate the t-statistics
+t = (a.mean() - b.mean())/(s*np.sqrt(2/N))
+
+
+
+## Compare with the critical t-value
+#Degrees of freedom
+df = 2*N - 2
+
+#p-value after comparison with the t
+p = 1 - stats.t.cdf(t,df=df)
+
+
+print("t = " + str(t))
+print("p = " + str(2*p))        # 双尾
+### You can see that after comparing the t statistic with the critical t value (computed internally) we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean of the two distributions are different and statistically significant.
+
+
+## Cross Checking with the internal scipy function
+t2, p2 = stats.ttest_ind(a,b)
+print("t = " + str(t2))
+print("p = " + str(p2))
-- 
cgit v1.2.3