def get_files(dir):
"""
获取所有文件名
:param dir: 目录
:return: 目录下所有去重文件名的列表
"""
file_list=[]
for roots, dirs, files in os.walk(dir):
for file in files:
file_list.append(file.split('.')[0])
return list(set(file_list))
def read_json(file_path):
"""
json文件中存储多行内容
读取json文件的每一行的内容
"""
with open(file_path,'r',encoding='utf-8') as f:
for line in f.readlines():
line_dict = json.loads(line)
print(line_dict)
print('++++++++++++')
def save_json_result(dir_path,person_feat,person_index):
"""
json文件中存储多行内容,每行一个dict
"""
with open(dir_path+'text_person.json','w',encoding='utf-8') as f:
out = {'data_id':None,'feat_value':None,'text_LabelIndex':None}
for pid in person_feat.keys():
out['data_id'] = pid
out['feat_value'] = person_feat[pid]
out['text_LabelIndex'] = person_index[pid]
json.dump(out,f,ensure_ascii=False)
f.write('n')
def make_path(params):
"""
Make folders for training and evaluation
"""
if not os.path.isdir(params.result_path):
os.makedirs(params.result_path)
if not os.path.isdir(params.ckpt_path):
os.makedirs(params.ckpt_path)
if not os.path.isdir("log"):
os.makedirs("log")
def writefile(filename,list,save_dir,sep=' '):
"""
该函数负责将处理好的标注数据文件保存
:param filename: 保存的文件名
:param list: 要保存的列表
:param sep: 字符和标注之间的分隔符
:return:
"""
save_file = os.path.join(save_dir,filename)
with open(save_file,'w',encoding='utf-8') as f:
for item in list:
if item=='n':
f.write( 'n')
else:
f.write(sep.join(item) + 'n')
# 读取00和81组成train.txt,82作为test.txt
def concate_file(dir,files,filename,save_path,sep=' '):
sentences=[]
for name in files:
t_file=os.path.join(dir,name+'.txt')
for line in codecs.open(t_file, 'r', 'utf8'):
line = line.replace('rn','').split(' ')
sentences.append(line)
if line[0] =='。' and line[1]=='O':
sentences.append('n')
writefile(filename,sentences,save_path)