Science/통계학
[통계학] python, sql 로 t-test 구현
pbj0812
2023. 2. 21. 02:32
0. 목표
- python, sql 로 t-test 구현
1. 실습
1) scipy
- Ttest_indResult(statistic=-3.0869745325651587, pvalue=0.031361515666731996)
import numpy as np
import scipy.stats
x = [1, 2, 3, 4, 5]
y = [4, 8, 12, 16, 20]
mean_x = np.mean(x)
mean_y = np.mean(y)
print('x : ', mean_x)
print('y : ', mean_y)
scipy.stats.ttest_ind(x, y, equal_var=False)
2) 그냥 파이썬
- -3.0869745325651587
import numpy as np
import math
x = [1, 2, 3, 4, 5]
y = [4, 8, 12, 16, 20]
a_mean = np.mean(x)
b_mean = np.mean(y)
# 분산
def var(inp):
tmp = []
for i in range(len(inp)):
tmp.append((inp[i] - np.mean(inp))**2)
result = np.sum(tmp) / (len(inp) - 1)
return result
a_var = var(x)
b_var = var(y)
# 통합분산
var_a_b = ((len(x) - 1) * a_var + (len(y) - 1) * b_var) / ((len(x) - 1) + (len(y) - 1))
# 표준오차
se_a_b = np.sqrt(var_a_b * (1 / len(x) + 1 / len(y)))
# t-test 구현
t_test = (a_mean - b_mean) / se_a_b
print(t_test)
3) MySQL
CREATE TABLE sql_study.t_test
(
id int,
x int,
y int
);
INSERT INTO sql_study.t_test(id, x, y) VALUES(1, 1, 4);
INSERT INTO sql_study.t_test(id, x, y) VALUES(2, 2, 8);
INSERT INTO sql_study.t_test(id, x, y) VALUES(3, 3, 12);
INSERT INTO sql_study.t_test(id, x, y) VALUES(4, 4, 16);
INSERT INTO sql_study.t_test(id, x, y) VALUES(5, 5, 20);
SELECT * FROM sql_study.t_test;
# 평균
WITH mean AS (
SELECT
AVG(x) AS mean_x,
AVG(y) AS mean_y,
COUNT(*) AS len
FROM sql_study.t_test
),
# 분산
var AS (
SELECT
SUM(POW(t.x - m.mean_x, 2)) / (SELECT COUNT(*) - 1 FROM sql_study.t_test) AS x_var,
SUM(POW(t.y - m.mean_y, 2)) / (SELECT COUNT(*) - 1 FROM sql_study.t_test) AS y_var,
(SELECT COUNT(*) - 1 FROM sql_study.t_test) AS df
FROM sql_study.t_test AS t, mean AS m
),
# 통합분산
total_var AS (
SELECT
(df * x_var + df * y_var) / (df + df) AS total_var,
df
FROM var
),
# 표준오차
total_se AS (
SELECT
POW(total_var * (1 / (df + 1) + 1 / (df + 1)), 1/2) AS total_se
FROM total_var
),
# t_test
final AS (
SELECT
(m.mean_x - m.mean_y) / s.total_se AS t_value
FROM total_se AS s, mean AS m
)
SELECT * FROM final;
2. 참고
- 자유도(Degree of Freedom: df)의 의미, n-1
- P Value from T Score Calculator
- T - 검정