import datetime as DTimport ioimport numpy as npimport pandas as pdpd.options.mode.chained_assignment = 'warn'content = ''' ssno lname fname pos_title ser gender dob 0 23456789 PLILEY JODY BUDG ANAL 0560 F 031871 1 987654321 NOEL HEATHER PRTG SRVCS SPECLST 1654 F 1208522 234567891 SonJU LAURIE SUPVY ConTR SPECLST 1102 F 0109993 345678912 MANNING CYNTHIA SOC SCNTST 0101 F 0816924 456789123 NAUERTZ ELIZABETH OFF AUTOMATION ASST 0326 F 031387'''df = pd.read_csv(io.StringIO(content), sep='s{2,}')df['dob'] = df['dob'].apply('{:06}'.format)now = pd.Timestamp('now')df['dob'] = pd.to_datetime(df['dob'], format='%m%d%y') # 1df['dob'] = df['dob'].where(df['dob'] < now, df['dob'] - np.timedelta64(100, 'Y')) # 2df['age'] = (now - df['dob']).astype('<m8[Y]') # 3print(df)产量
ssno lname fname pos_title ser gender 23456789 PLILEY JODY BUDG ANAL 560 F 1 987654321 NOEL HEATHER PRTG SRVCS SPECLST 1654 F 2 234567891 SonJU LAURIE SUPVY ConTR SPECLST 1102 F 3 345678912 MANNING CYNTHIASOC SCNTST 101 F 4 456789123 NAUERTZ ELIZABETH OFF AUTOMATION ASST 326 F dob age 0 1971-03-18 00:00:00 43 1 1952-12-08 18:00:00 61 2 1999-01-09 00:00:00 15 3 1992-08-16 00:00:00 22 4 1987-03-13 00:00:00 27
- 看来您的
dob
栏目前是字串。首先,将它们转换为Timestamps
usingpd.to_datetime
。 - 该格式
'%m%d%y'
将最后两位数字转换为年份,但不幸的是假设的52
平均值为2052。由于这可能不是希瑟·诺埃尔(Heather Noel)的出生年,因此,dob
只要dob
大于,就减去100年now
。您可能要减去几年now
的状况,df['dob'] < now
因为拥有101岁工人的可能性可能比拥有1岁工人的可能性要高一些。 - 您可以减去
dob
从now
获得timedelta64 [NS] 。要将其转换为年份,请使用astype('<m8[Y]')或astype('timedelta64[Y]')。



