栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

Kaggle:Titanic use Logistic Regression

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

Kaggle:Titanic use Logistic Regression

不定时更新~

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  9 19:29:17 2021

@author: Lenovo
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import *

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
print('-----------------------DATA_PROCESSING-----------------------')

data = pd.concat((train_data, test_data), axis=0)
X_id = data[data['Survived'].isnull()].loc[:, 'PassengerId']
data.drop(['PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)
print(data.info())

#船舱处理
data['Cabin'] = data['Cabin'].fillna('Unknown')
data['Deck']=data['Cabin'].str.get(0)

#data = data.drop(index=(data.loc[(data['Fare'].isnull())].index))
print(data[data['Fare'].isnull()])

#年龄处理
from sklearn.ensemble import RandomForestRegressor
age_df = data[['Age', 'Pclass','Sex']]
age_df=pd.get_dummies(age_df)
known_age = age_df[age_df.Age.notnull()].as_matrix()
unknown_age = age_df[age_df.Age.isnull()].as_matrix()
y = known_age[:, 0]
X = known_age[:, 1:]
rfr = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1)
rfr.fit(X, y)
predictedAges = rfr.predict(unknown_age[:, 1::])
data.loc[ (data.Age.isnull()), 'Age' ] = predictedAges 
#上船地点处理
data['Embarked'] = data['Embarked'].fillna('C')
#print(data[data['Embarked'].isnull()])
print('-----------------------TRAIN_TEST_SPLIT-----------------------')

data=data[['Survived','Pclass','Sex','Age','Fare','Embarked','Deck']]
data=pd.get_dummies(data)
train=data[data['Survived'].notnull()]
X_train = train.as_matrix()[:,1:]
y_train = train.as_matrix()[:,0]
X_test = data[data['Survived'].isnull()].drop('Survived',axis=1)
print(X_test)
print('-------------------------MODEL_GENERATING---------------------')
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
print('------------------------------------')
print(log_reg.score(X_train, y_train))
print('------------------------------------')
y_pred = log_reg.predict(X_test)


print('----------------------OUTPUT_FILE_GENERATING------------------')
# 导入CSV安装包
import csv

# 1. 创建文件对象
f = open(r'C:UsersLenovoDesktopkaggleTitanicsubmission.csv','w', newline='')

# 2. 基于文件对象构建 csv写入对象
csv_writer = csv.writer(f)

# 3. 构建列表头
csv_writer.writerow(["PassengerId","Survived"])

# 4. 写入csv文件内容

for x, y in zip(X_id, y_pred):
    csv_writer.writerow([x, y])
print('-------------------------WORD_DONE---------------------')
# 5. 关闭文件
f.close()

Polynomial方式:

print('-------------------------MODEL_GENERATING---------------------')
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def PolynomialLogisticRegression(degree):
    return Pipeline([
        ('poly', PolynomialFeatures(degree=degree)),
        ('std_scaler', StandardScaler()),
        ('log_reg', LogisticRegression())
    ])
poly_log_reg = PolynomialLogisticRegression(degree = 3)
poly_log_reg.fit(X_train, y_train)
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/468178.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号