From 7cbbd04247f24cac4baa0e8d0f0979aa058856e7 Mon Sep 17 00:00:00 2001 From: zhang Date: Sat, 29 Feb 2020 23:07:55 +0800 Subject: =?UTF-8?q?numpy=E5=AE=9E=E7=8E=B0t=E6=A3=80=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stats/t-test.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 stats/t-test.py (limited to 'stats/t-test.py') diff --git a/stats/t-test.py b/stats/t-test.py new file mode 100644 index 0000000..3ebba84 --- /dev/null +++ b/stats/t-test.py @@ -0,0 +1,50 @@ +## Import the packages +import numpy as np +from scipy import stats + + +## Define 2 random distributions +#Sample Size +N = 10 +#Gaussian distributed data with mean = 2 and var = 1 +a = np.random.randn(N) + 2 +#Gaussian distributed data with with mean = 0 and var = 1 +b = np.random.randn(N) + + +## Calculate the Standard Deviation +#Calculate the variance to get the standard deviation + +#For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1 +var_a = a.var(ddof=1) +var_b = b.var(ddof=1) + +#std deviation +s = np.sqrt((var_a + var_b)/2) +print(a) +print(b) + + + +## Calculate the t-statistics +t = (a.mean() - b.mean())/(s*np.sqrt(2/N)) + + + +## Compare with the critical t-value +#Degrees of freedom +df = 2*N - 2 + +#p-value after comparison with the t +p = 1 - stats.t.cdf(t,df=df) + + +print("t = " + str(t)) +print("p = " + str(2*p)) # 双尾 +### You can see that after comparing the t statistic with the critical t value (computed internally) we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean of the two distributions are different and statistically significant. + + +## Cross Checking with the internal scipy function +t2, p2 = stats.ttest_ind(a,b) +print("t = " + str(t2)) +print("p = " + str(p2)) -- cgit v1.2.3