目录
1 下载原始图片
2 根据坐标裁剪出人脸子图
1 下载原始图片
编写python脚本下载PubFig人脸公开数据集,每个人的人脸放到一个以人名命名的单独文件夹中,
import os
import sys
import urllib.request as request
from urllib.request import urlretrieve
import socket
import requests
import cv2
'''
下载Pubfig人脸数据集,下载下来之后根据坐标裁剪人脸,
并且每个人的人脸图片放到单独的一个文件夹中
'''
val_urls = "eval_urls.txt"
dev_urls = "dev_urls.txt"
originDir = "./originDir2" #用来保存下载的原始图片
faceDir = "./faceDir" #用来保存裁剪的人脸子图
#设定一下无响应时间,
timeout = 3
socket.setdefaulttimeout(timeout)
# 为请求增加一下头
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'
headers = ('User-Agent', user_agent)
opener = request.build_opener()
opener.addheaders = [headers]
request.install_opener(opener)
'''
根据url获取文件名字
'''
def getPic(dir, name, nameNum, url, box):
if not os.path.exists(dir):#每个人的图片放到一个单独的文件夹中,
os.makedirs(dir, exist_ok=True)
bad_url = []
coord = box.split(",")
print("coord:", coord)
try:
fileName = dir + "/" + name + "_" + nameNum + "_" + coord[0] + "_" + coord[1] + "_" + coord[2] + "_" + coord[3] + ".jpg"
print("url::", url)
print("fileName::", fileName)
request.urlretrieve(url, fileName)
#urlretrieve(url, fileName)
except Exception as e:
print(Exception, ':', e)
bad_url.append(url)
def downloadPic(txtName):
with open(txtName) as f:
lineCount = 0
nameCount = 0 #用来给图片命名,每个图片的名字为:人名_nameCount
lines = f.readlines()
for line in lines:
#print("line::", line)
if lineCount >= 2:#txt文件前面两行为数据格式说明,非有效数据,过滤掉。
#name1, name2, number, url, box, md5 = line.split() #有的人名有3个单词,这样会报错
lineList = line.split()
if 6 == len(lineList):
dir = lineList[0]
nameNum = lineList[2]
url = lineList[3]
box = lineList[4]
if 7 == len(lineList):
dir = lineList[0]
nameNum = lineList[3]
url = lineList[4]
box = lineList[5]
getPic(originDir + "/" + dir, dir, nameNum, url, box)#目录传进去的是originDir + "/" + dir,
lineCount = lineCount + 1
if __name__ == '__main__':
downloadPic(dev_urls)
downloadPic(eval_urls)
2 根据坐标裁剪出人脸子图
同样也是每个人的图片放到一个单独的文件夹中,明明方向是人名加number.
import os
import sys
import urllib.request as request
from urllib.request import urlretrieve
import socket
import requests
import cv2
'''
根据坐标裁剪出人脸子图
'''
srcDir = './originDir'
dstDir = './faceDir'
def clip(srcDir, dstDir):
for dirName in os.listdir(srcDir):
#print("dirname:", dirName)
for fileName in os.listdir(os.path.join(srcDir, dirName)):
print("filename::",fileName)
#print("filename::", os.path.join(srcDir, dirName, fileName))
try:
originImg = cv2.imread(os.path.join(srcDir, dirName, fileName))
print("fileName.split('_')[5]:::", fileName.split('_')[5].split('.'))
personName = fileName.split('_')[0]
x1 = fileName.split('_')[2]
y1 = fileName.split('_')[3]
x2 = fileName.split('_')[4]
y2 = fileName.split('_')[5].split('.')[0]
newName = fileName.split('_')[0] + '_' + fileName.split('_')[1] + "." + fileName.split('_')[5].split('.')[1]
#print("x1::", x1)
#print("y1::", y1)
#print("x2::", x2)
#print("y2::", y2)
#print("newName::", newName)
faceImg = originImg[int(y1):int(y2), int(x1):int(x2)]
faceDir = os.path.join(dstDir, personName)
print("faceDir::", faceDir)
if not os.path.exists(faceDir):#每个人的图片放到一个单独的文件夹中,
os.makedirs(faceDir, exist_ok=True)
cv2.imwrite(os.path.join(faceDir, newName), faceImg)
except Exception as e:
#print("filename::", os.path.join(srcDir, dirName, fileName))
print(Exception, ':', e)
if __name__ == '__main__':
clip(srcDir, dstDir)
参考文献:
Pubfig DataBase 下载Python脚本_CodeCold的博客-CSDN博客_pubfig数据集
python爬取人脸识别图片数据集/py - 云+社区 - 腾讯云
https://www.cnblogs.com/darkknightzh/p/5715305.html



