采用:
files = glob.glob('shelldemo/*.csv')dfs = []for fp in files: #if multiple columns with no () #df = pd.read_csv(fp, index_col=['S.no','id','number']) df = pd.read_csv(fp, index_col=['ID']) df['file'] = os.path.basename(fp).split('.')[0] df = df.set_index('file', append=True) df.columns = df.columns.str.extract('((d+))', expand=False).astype(int) dfs.append(df)df1 = pd.concat(dfs, sort=False).reset_index()print (df1) ID file 58 67 89 91 96 1000 1 file1 NaN 56 78.0 98.0 NaN 101.01 2 file1 NaN 91 100.0 121.0 NaN NaN2 3 file2 102.0 103 NaN NaN 101.0 104.03 4 file2 113.0 117 NaN NaN 112.0 119.0print (df2) File Price1 Price2 Price3 Price40 File1 67 89 91 1001 File2 96 58 105 99
df2.columns = df2.columns.str.lower() df2['file'] = df2['file'].str.lower()#merge data together by left join df = df1.merge(df2, on='file', how='left')print (df) ID file 58 67 89 91 96 100 price1 price2 price3 1 file1 NaN 56 78.0 98.0 NaN 101.0 67 89 91 1 2 file1 NaN 91 100.0 121.0 NaN NaN 67 89 91 2 3 file2 102.0 103 NaN NaN 101.0 104.0 96 58 105 3 4 file2 113.0 117 NaN NaN 112.0 119.0 96 58 105 price4 0 100 1 100 2 99 3 99
#filter integers between ()df1 = df.loc[:, df.columns.str.isnumeric().isnull()].copy()#filter all columns with pricedf2 = df.filter(regex='price').copy()uniq_vals_df2 = df2.stack().dropna().drop_duplicates()not_matched_vals = np.setdiff1d(uniq_vals_df2, df1.columns)df1 = df1.join(pd.Dataframe(columns=not_matched_vals.tolist() + ['a']))#replace columns by match values from df2for c in df2.columns: df2[c] = df1.lookup(df1.index, df2[c].fillna('a'))#join to original Dataframe df = df[['file','ID']].join(df2)print (df) file ID price1 price2 price3 price40 file1 1 56.0 78.0 98.0 101.01 file1 2 91.0 100.0 121.0 NaN2 file2 3 101.0 102.0 NaN NaN3 file2 4 112.0 113.0 NaN NaN



