# 交叉驗證與模型評估

``````import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

def data_from(img_file):
idx = np.where(gray < 127)       # 黑點的索引

data_x = idx[1]
data_y = -idx[0] + gray.shape[0] # 反轉 y 軸

sep = data_x.size // 10 * 2

learn_x = data_x[sep:]
learn_y = data_y[sep:]

test_x = data_x[0:sep]
test_y = data_y[0:sep]

return {
'learn' : {'x': data_x[sep:], 'y': data_y[sep:]},   # 計算迴歸用
'test'  : {'x': data_x[0:sep], 'y': data_y[0:sep]}  # 測試用
}

data = data_from('data.jpg')

plt.gca().set_aspect(1)
plt.scatter(data['learn']['x'], data['learn']['y'])
plt.scatter(data['test']['x'], data['test']['y'])

linreg = LinearRegression()    # 負責線性迴歸
# 擬合
linreg = linreg.fit(
data['learn']['x'].reshape((data['learn']['x'].size, 1)),  # 符合 fit 要求的形狀
data['learn']['y']
)

# 迴歸線
x = [0, 50]
y = linreg.predict([[0], [50]])
plt.plot(x, y)

predict_y = linreg.predict(data['test']['x'].reshape((data['test']['x'].size, 1)))

# 均方差
plt.text(20, 5,
"MSE: " + str(mean_squared_error(data['test']['y'], predict_y)))
plt.show()
``````

``````import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

def data_from(img_file):
idx = np.where(gray < 127)       # 黑點的索引

data_x = idx[1]
data_y = -idx[0] + gray.shape[0] # 反轉 y 軸

sep = data_x.size // 10 * 2

learn_x = data_x[sep:]
learn_y = data_y[sep:]

test_x = data_x[0:sep]
test_y = data_y[0:sep]

return {
'learn' : {'x': data_x[sep:], 'y': data_y[sep:]},
'test'  : {'x': data_x[0:sep], 'y': data_y[0:sep]}
}

data = data_from('data.jpg')

plt.gca().set_aspect(1)
plt.scatter(data['learn']['x'], data['learn']['y'])
plt.scatter(data['test']['x'], data['test']['y'])

poly = PolynomialFeatures()     # 二次多項式
feature = poly.fit_transform(data['learn']['x'].reshape([data['learn']['x'].size, 1])) # 特徵值
linreg = LinearRegression()          # 線性迴歸
linreg = linreg.fit(feature, data['learn']['y']) # 擬合

x = np.linspace(0, 50, 50)
y = linreg.predict(
poly.fit_transform(x.reshape((x.size, 1)))
)
plt.plot(x, y)

predict_y = linreg.predict(poly.fit_transform(data['test']['x'].reshape((data['test']['x'].size, 1))))
plt.text(20, 5, "MSE: " + str(mean_squared_error(data['test']['y'], predict_y)))
plt.show()
``````

| 預測＼實際 | 正 | 負 |
| :—-: | :—-: | :—-: |
| 正 | TP（True Positive） | FP（False Positive） |
| 負 | FP（False Negative） | TN（True Negative） |

``````import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

def data_from(csv_file):
sep = data.shape[0] // 10 * 2
return {
'learn': data[sep:],
'test': data[:sep]
}

def scatter(data, t_marker, f_marker):
label = data[:,2]
normal_weight = label == 1
overweight = label == 0
plt.scatter(data[normal_weight, 0], data[normal_weight, 1], marker = t_marker)
plt.scatter(data[overweight, 0], data[overweight, 1], marker = f_marker)

data = data_from('height_weight.csv')

# 學習資料
learn_data = data['learn']
scatter(learn_data, 'o', 'x')

learn_height_weight = learn_data[:,0:2]
learn_label = learn_data[:,2]

p = Perceptron()
p.fit(learn_height_weight, learn_label)
coef = p.coef_[0]
intercept = p.intercept_

height = learn_height_weight[:,0]
h = np.arange(np.min(height), np.max(height))
w = -(coef[0] * h + intercept) / coef[1]
plt.plot(h, w, linestyle='dashed') # 分類線

# 測試資料
test_data = data['test']
test_height_weight = test_data[:,0:2]
scatter(test_data, '8', 'X')

pred = p.predict(test_height_weight)
test = test_data[:,2]

plt.text(135, 145, "Accuracy: " + str(accuracy_score(test, pred)))

plt.xlabel('height')
plt.ylabel('weight')
plt.show()
``````

``````import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures

def data_from(csv_file):
sep = data.shape[0] // 10 * 2
return {
'learn': data[sep:],
'test': data[:sep]
}

def scatter(data, t_marker, f_marker):
label = data[:,2]
normal_weight = label == 1
overweight = label == 0
plt.scatter(data[normal_weight, 0], data[normal_weight, 1], marker = t_marker)
plt.scatter(data[overweight, 0], data[overweight, 1], marker = f_marker)

data = data_from('height_weight.csv')

# 學習資料
learn_data = data['learn']
scatter(learn_data, 'o', 'x')

learn_height_weight = learn_data[:,0:2]
learn_label = learn_data[:,2]

# 邏輯迴歸分類
poly = PolynomialFeatures()
feature = poly.fit_transform(learn_height_weight)
lg_reg = LogisticRegression()
lg_reg.fit(feature, learn_label)

# 分類線
coef = lg_reg.coef_[0]
height = learn_height_weight[:,0]
h = np.arange(np.min(height), np.max(height))
ycoef0 = [coef[5]] * h.size
ycoef1 = coef[2] + coef[4] * h
ycoef2 = coef[0] + coef[1] * h + coef[3] * (h ** 2)
ycoef = np.dstack((ycoef0, ycoef1, ycoef2))[0]
y = np.apply_along_axis(np.roots, 1, ycoef) # 解平方根
w = y[:,1] # 只需要正值部份
plt.plot(h, w, linestyle='dashed')

# 測試資料
test_data = data['test']
test_height_weight = test_data[:,0:2]
scatter(test_data, '8', 'X')

pred = lg_reg.predict( poly.fit_transform(test_height_weight))
test = test_data[:,2]

plt.text(135, 145, "Accuracy: " + str(accuracy_score(test, pred)))

plt.xlabel('height')
plt.ylabel('weight')
plt.show()
``````