我们可以使用
scipy.spatial.distance.cdist或多个for循环,然后用路由替换min并找到最接近的ie
mat = scipy.spatial.distance.cdist(df[['latitude_value','longitude_value']], df[['latitude_value','longitude_value']], metric='euclidean')# If you dont want scipy, you can use plain python like # import math# mat = []# for i,j in zip(df['latitude_value'],df['longitude_value']):# k = []# for l,m in zip(df['latitude_value'],df['longitude_value']):# k.append(math.hypot(i - l, j - m))# mat.append(k)# mat = np.array(mat)new_df = pd.Dataframe(mat, index=df['routeId'], columns=df['routeId'])
输出
new_df
routeId r1 r2 r3 r4 r5 r6 r7routeId r1 0.000000 0.316529 0.056505 0.117266 0.309875 0.309875 0.309875r2 0.316529 0.000000 0.349826 0.333829 0.007998 0.007998 0.007998r3 0.056505 0.349826 0.000000 0.077188 0.343845 0.343845 0.343845r4 0.117266 0.333829 0.077188 0.000000 0.329176 0.329176 0.329176r5 0.309875 0.007998 0.343845 0.329176 0.000000 0.000000 0.000000r6 0.309875 0.007998 0.343845 0.329176 0.000000 0.000000 0.000000r7 0.309875 0.007998 0.343845 0.329176 0.000000 0.000000 0.000000#Replace minimum distance with column name and not the minimum with `False`.# new_df[new_df != 0].min(),0). This gives a mask matching minimum other than zero. closest = np.where(new_df.eq(new_df[new_df != 0].min(),0),new_df.columns,False)# Remove false from the array and get the column names as list . df['close'] = [i[i.astype(bool)].tolist() for i in closest] routeId latitude_value longitude_value close0 r1 28.210216 22.813209 [r3]1 r2 28.216103 22.496735 [r5, r6, r7]2 r3 28.161786 22.842318 [r1]3 r4 28.093110 22.807081 [r3]4 r5 28.220370 22.503500 [r2]5 r6 28.220370 22.503500 [r2]6 r7 28.220370 22.503500 [r2]
如果您不想忽略零,那么
# Store the array values in a variablearr = new_df.values# We dont want to find mimimum to be same point, so replace diagonal by nanarr[np.diag_indices_from(new_df)] = np.nan# Replace the non nan min with column name and otherwise with falsenew_close = np.where(arr == np.nanmin(arr, axis=1)[:,None],new_df.columns,False)# Get column names ignoring false. df['close'] = [i[i.astype(bool)].tolist() for i in new_close] routeId latitude_value longitude_value close0 r1 28.210216 22.813209 [r3]1 r2 28.216103 22.496735 [r5, r6, r7]2 r3 28.161786 22.842318 [r1]3 r4 28.093110 22.807081 [r3]4 r5 28.220370 22.503500 [r6, r7]5 r6 28.220370 22.503500 [r5, r7]6 r7 28.220370 22.503500 [r5, r6]



