def func(self,mean,var,xi):
'''
概率密度函数
:param mean:均值
:param var:方差
:param xi:预测的样本的该属性值
:return:
'''
return exp(-((float(xi)-mean)**2)/(2*var**2))/(sqrt(2*pi)*var)
def AODE_predict(self,yangben_data):
'''
预测下次是否违约
:param yangben_data: 要预测的样本数据[负债比率,月收入,未偿还贷款数,家庭人数]
:return:
'''
str_arr=['DebtRatio','MonthlyIncome','num_late_card','families']
mean_var_arr0=[]
mean_var_arr1 = []
for i in range(len(str_arr)):
arr_temp1=[]
arr_temp0 = []
df_ = df.select(df[str_arr[i]],df['y'])
df_1=df_.filter(df['y'] == 1)
df_0=df_.filter(df['y'] == 0)
df_result1 = df_1.select(mean(str_arr[i]).alias('mean'),
stddev(str_arr[i]).alias('stddev')).collect()
debt_mean1 = df_result1[0]["mean"]
debt_var1 = df_result1[0]["stddev"]
arr_temp1.append(debt_mean1)
arr_temp1.append(debt_var1)
mean_var_arr1.append(arr_temp1)
df_result0 = df_0.select(mean(str_arr[i]).alias('mean'),
stddev(str_arr[i]).alias('stddev')).collect()
debt_mean0 = df_result0[0]["mean"]
debt_var0 = df_result0[0]["stddev"]
arr_temp0.append(debt_mean0)
arr_temp0.append(debt_var0)
mean_var_arr0.append(arr_temp0)
print(str_arr[i])
print("1",debt_mean1,debt_var1) # 求方差和均值
print("0",debt_mean0,debt_var0)
pre_1 = 1
pre_0 = 1
for i in range(len(yangben_data)):
p1 = self.func(mean_var_arr1[i][0], mean_var_arr1[i][1], yangben_data[i])
p0 = self.func(mean_var_arr0[i][0], mean_var_arr0[i][1], yangben_data[i])
pre_0 = pre_0 * p0
pre_1 = pre_1 * p1
if (pre_0 > pre_1):
print("this is 0")
return "下次不逾期"
else:
print("this is 1")
return "下次逾期"