1. 写代码或者读代码都要有Top-down或者Bottom up的思维,先把任务分块
2. 代码由函数模块构成,理解每一个模块才能让轮子跑起来。
3. 耐心,一行一行的确认可执行最终就能实现你的结果。比如第一步路径我都花了1个小时才确认了我的Thinkpad电脑是/路径。
4. 注释很重要,因为哪怕很短的时间你也会忘光光
import csv
import os
import re
def get_gvkey_and_company(fds):
f = open(os.getcwd() + "/Factiva_Verified_Unverified.csv", 'r')
f_csv = csv.reader(f)
for row in f_csv:
if len(row) == 0:
continue
if row[4] != fds:
continue
return [row[0], row[1]]
f.close()
def get_fds(path):
f = open(path, 'r')
f_csv = csv.reader(f)
for row in f_csv:
if len(row) == 0:
continue
if row[0] != "Text":
continue
re_obj = re.search("fds=(.*) and rst.*", row[1])
if re_obj:
return re_obj.group(1)
f.close()
def get_dates(path):
result = list()
f = open(path, 'r')
f_csv = csv.reader(f)
for row in f_csv:
if len(row) == 0:
continue
re_obj = re.search("Start Date.*", row[0])
if re_obj:
result.append([row[0], row[1]])
return result
file_dir = "/Users/anthony/PycharmProjects/pythonProject/Factiva/Envrlocal_Code2"
for root, dirs, files in os.walk(file_dir, topdown=False):
for i in files:
fds = get_fds(os.getcwd() + f"/Envrlocal_Code2/{i}")
gv_and_company = get_gvkey_and_company(fds)
dates = get_dates(os.getcwd() + f"/Envrlocal_Code2/{i}")
f = open(os.getcwd() + "/final_anyway.csv", 'a', newline='')
writer = csv.writer(f)
for j in dates:
if gv_and_company:
writer.writerow([gv_and_company[0], gv_and_company[1], fds, j[0], j[1]])
print(gv_and_company[0], gv_and_company[1], fds, j)
else:
writer.writerow(["", "", fds, j[0], j[1]])



