paddleocr自定义数据集标注图片,识别部分的图片自动裁剪的python代码。
import os
from PIL import Image
import json
list01 = []
with open("Label.txt", 'r', encoding="utf-8") as f:
data = f.readlines()
for item in data:
pic_name = item.strip().split("t")[0].split("/")[1]
json_info = item.strip().split("t")[1]
# print(pic_name)
# ok = json.dumps(pic_name)
ok = json.loads(json_info)
points_1 = ok[0]["points"]
transcription_1 = ok[0]["transcription"]
points_2 = ok[1]["points"]
transcription_2 = ok[1]["transcription"]
box01 = (round(points_1[0][0], 0), round(points_1[0][1], 0), round(points_1[1][0], 0), round(points_1[2][1], 0))
# print(box01)
box02 = (round(points_2[0][0], 0), round(points_2[0][1], 0), round(points_2[1][0], 0), round(points_2[2][1], 0))
# print(box02)
list01.append((pic_name, box01, box02, transcription_1, transcription_2))
pic_path = r"E:OurDatahandleP"
save_path = r"E:OurDatahandleR"
for dirpath, dirnames, filenames in os.walk(pic_path):
for filename in filenames:
print(filename)
for item in list01:
if item[0] == filename:
img = Image.open(os.path.join(pic_path, filename))
box01 = item[1]
box02 = item[2]
img01 = img.crop(box01)
img02 = img.crop(box02)
img01.save(os.path.join(save_path, filename.split(".")[0] + "_0_" + item[3] + ".jpg"))
img02.save(os.path.join(save_path, filename.split(".")[0] + "_1_" + item[4] + ".jpg"))



