import pandas as pd # 读取文件 melbourne_file_path = '../input/melbourne-housing-snapshot/melb_data.csv' melbourne_data = pd.read_csv(melbourne_file_path) # 显示列名(用于查看数据结构) melbourne_data.columns # 删除具有空值的行,一行有一个空值则全部删除 melbourne_data = melbourne_data.dropna(axis=0) # 提取用于数据预测的列,有两种方式 y = melbourne_data.Price y = melbourne_data['Price'] # 一次性提取多个列用于自变量 melbourne_features = ['Rooms', 'Bathroom', 'Landsize', 'Lattitude', 'Longtitude'] X = melbourne_data[melbourne_features] # 对dataframe每一列特征进行描述,如中位数,平均数,有效数据量等 X.describe() # 观察前几行数据 X.head()



