빅데이터분석기사/코드

[빅데이터분석기사] 파이썬 머신러닝(ML) 기본 틀 맛보기

EveningPrimrose 2021. 12. 3. 01:14
반응형

일단 아래의 머신러닝 전 과정을 한번 훑고, 세부적으로 공부할 것

 


---분석데이터 검토---
import pandas as pd
data=pd.read_csv('데이터명', encoding='utf-8')
data.head()
print(data.shape())

---특성(X)과 레이블(y) 나누기---
X1=data[['a', 'b', 'c']]
y=data[['z']]

---train, test 데이터셋 나누기---
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X1, y, stratify=y, random_state=42)

---정규화(MinMax)---
from sklearn.preprocessing import MinMaxScaler
scaler_minmax = MinMaxScaler()
scaler_minmax.fit(X_train)
X_scaled_minmax_train = scaler_minmax.transform(X_train)
X_scaled_minmax_test = scaler_minmax.transform(X_test)

---①모델 적용(로지스틱)---
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_scaled_minmax_train, y_train)
pred_train = model.predict(X_scaled_minmax_train)
model.score(X_scaled_minmax_train, y_train)
pred_test = model.predict(X_scaled_minmax_test)
model.score(X_scaled_minmax_test, y_test)

 

---②모델 적용(선형회귀)---

from sklearn.linear_model import LinearRegression

model=LinearRegression()

model.fit(X_scaled_minmax_train, y_train)

pred_train=model.predict(X_scaled_minmax_train)

model.score(X_scaled_minmax_train, y_train)

 

---③모델 적용(랜덤포레스트(분류))---

from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier()

model.fit(X_scaled_minmax_train, y_train)

pred_train=model.predict(X_scaled_minmax_train)

model.score(X_scaled_minmax_train, y_train)

 

---④모델 적용(랜덤포레스트(회귀))---

from sklearn.ensemble import RandomForestRegressor

model=RandomForestRegressor()

model.fit(X_scaled_minmax_train, y_train)

pred_train=model.predict(X_scaled_minmax_train)

model.score(X_scaled_minmax_train, y_train)


from sklearn.metrics import confusion_matrix
confusion_train = confusion_matrix(y_train, pred_train)
print("훈련데이터 오차행렬: \n", confusion_train)
confusion_test = confusion_matrix(y_test, pred_test)
print("테스트데이터 오차행렬: \n", confusion_test)

from sklearn.metrics import calssification_report
cfreport_train = classification_report(y_train, pred_train)
print("분류예측 레포트: \n", cfreport_train)
cfreport_train = classification_report(y_test, pred_test)
print("분류예측 레포트: \n", cfreport_train)

prob_train = model.predic_proba(X_scaled_minmax_train)
y_train[['y_pred']] = pred_train
y_train[['y_prob0', 'y_prob1']] = prob_train
y_train
y_test[['y_pred']] = pred_test
y_test[['y_prob0', 'y_prob1']] = prob_test
y_test

Total_test = pd.concat([X_test, y_test], axis=1)
Total_test
Total_test.to_csv("classification_test.csv")

반응형