1、A文件夹中有图片和xml文件,图片中的标准不止一个类别,记为aa、bb、cc
通过这个程序,把A文件夹中所有标注为aa目标的截图、bb目标的截图、cc目标的截图都分类放到B文件夹中
import xml.etree.ElementTree as ET
import glob
import os
from PIL import Image
PATH_TO_ANNOTATIONS = r'C:/Users/Administrator/Desktop/A'
PATH_TO_IMAGES = r'C:/Users/Administrator/Desktop/A'
PATH_TO_SAVE = r'C:/Users/Administrator/Desktop/B'
def main():
xml_files = glob.glob(os.path.join(PATH_TO_ANNOTATIONS, "*.xml"))
index = 0
for xml_file in xml_files:
print("Progress:%d/%d" %(index + 1, len(xml_files)))
print(xml_file)
index = index + 1
# 解析xml文件
tree = ET.parse(xml_file)
annotation = tree.getroot()
obj_num = 0
img = None
for obj in annotation.findall('object'):
value = (annotation.find('filename').text+'.jpg',
int(annotation.find('size').find('width').text),
int(annotation.find('size').find('height').text),
obj.find('name').text,
int(obj.find('bndbox').find('xmin').text),
int(obj.find('bndbox').find('ymin').text),
int(obj.find('bndbox').find('xmax').text),
int(obj.find('bndbox').find('ymax').text)
)
# 获取对应的图片文件
image_file = xml_file.split('\')[-1].split('.')[0]
crop_file = os.path.join(PATH_TO_SAVE, value[3], image_file + '_%d.jpg' % obj_num)
image_file = os.path.join(PATH_TO_IMAGES, image_file + '.' + value[0].split('.')[-1])
# 提取图片
if obj_num == 0:
img = Image.open(image_file)
crop_img = img.crop((min(value[4], value[6]), min(value[5], value[7]), max(value[4], value[6]), max(value[5], value[7])))
if not os.path.exists(os.path.join(PATH_TO_SAVE, value[3])):
os.makedirs(os.path.join(PATH_TO_SAVE, value[3]))
crop_img.save(crop_file)
obj_num = obj_num + 1
if __name__ == '__main__':
main()
2、A文件夹中有图片和xml文件,有些图片中没有目标,就不用标注,因此就没有xml文件。把A文件夹中有xml的图片以及对应的xml文件提取出来,复制到B文件夹中
import os
from PIL import Image
import xml.dom.minidom
src_file_path = "C:/Users/Administrator/Desktop/1113/A"
dst_file_path = "C:/Users/Administrator/Desktop/1113/B"
dir_list=os.listdir(src_file_path)
for file in dir_list:
if file.endswith('.xml'):
str_=file.split(".xml")[0]
print(str_)
img_src_path = src_file_path + '/' + str_+'.jpg'
im = Image.open(img_src_path)
img_save_path =dst_file_path+'/'+str_+'.jpg'
im.save(img_save_path)
xml_src_path=src_file_path+'/'+file
dom=xml.dom.minidom.parse(xml_src_path)
xml_save_path=dst_file_path+'/'+file
with open(xml_save_path, 'w') as fh:
dom.writexml(fh)



