这应该在您想要的两列中返回父级和子级ID:
import pandas as pdimport numpy as npimport itertoolsdf = pd.Dataframe( { 'parent_id': [3111, 2010, 3000, 1000, 4023, 3011, 3033, 5010, 3011, 3102, 2010, 4023, 2110, 2100, 1000, 5010, 2110, 1000, 5010, 3033], 'child_id': [4321, 3102, 4023, 2010, 5321, 4200, 4113, 6525, 4010, 4001, 3011, 5010, 3000, 3033, 2110, 6100, 3111, 2100, 6016, 4311] })def get_child_list(df, parent_id): list_of_children = [] list_of_children.append(df[df['parent_id'] == parent_id]['child_id'].values) for i_, r_ in df[df['parent_id'] == parent_id].iterrows(): if r_['child_id'] != parent_id: list_of_children.append(get_child_list(df, r_['child_id'])) # to flatten the list list_of_children = [item for sublist in list_of_children for item in sublist] return list_of_childrennew_df = pd.Dataframe(columns=['parent_id', 'list_of_children'])for index, row in df.iterrows(): temp_df = pd.Dataframe(columns=['parent_id', 'list_of_children']) temp_df['list_of_children'] = pd.Series(get_child_list(df, row['parent_id'])) temp_df['parent_id'] = row['parent_id'] new_df = new_df.append(temp_df)print new_df


