import pandas as pd
import numpy as np
#编码距离,用于匹配最相近的中文字符串
def edit_distance(word1, word2):
len1 = len(word1)
len2 = len(word2)
dp = np.zeros((len1 + 1, len2 + 1))
for i in range(len1 + 1):
dp[i][0] = i
for j in range(len2 + 1):
dp[0][j] = j
for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
delta = 0 if word1[i - 1] == word2[j - 1] else 1
dp[i][j] = min(dp[i - 1][j - 1] + delta, min(dp[i - 1][j] + 1, dp[i][j - 1] + 1))
return dp[len1][len2]
a=pd.read_csv(r'表格路径')
b=pd.read_excel(r'表格路径')
A=a['属性3'].tolist()
print(A)
Azl=a['属性1']
Arj=a['属性2']
B=b['属性3'].tolist()
count=0
for i in A:
list1=[]
dic1={}
for j in B:
distance=edit_distance(i,j)
if distance==0:
dic1[distance] = [i, j]
list1.append(distance)
break
else:
dic1[distance]=[i,j]
list1.append(distance)
Mindistance=min(list1)
if Mindistance <= 1:
shut=dic1[Mindistance]
print(shut)
Bindex=B.index(shut[1])
Aindex=A.index(shut[0])
shutAzl=Azl[Aindex]
shutArj=Arj[Aindex]
b.loc[Bindex,'属性1']=shutAzl
b.loc[Bindex,'属性2']=shutArj
else:
continue
out = b.to_excel('out4.xlsx', sheet_name='sheetname', index=False)