1. 安装必要的包:
pip install pdfkit pip install wkhtmltopdf apt-get install wkhtmltopdf
2. 关闭display配置:
vim ~/.bashrc export QT_QPA_PLATFORM='offscreen' source ~/.bashrc
3. 代码(需要注意html编码方式):
# -*- coding: utf-8 -*-
# @Time : 2021/9/26 10:49
# @Author : lijun
import pdfkit
import os
import re
def rectify_charset(html, charset='utf-8'):
if '', '', html)
else:
if '' in html and '' in html:
re.sub(r'', 'nn', html)
else:
html = 'nnn' + html
return html
def main():
options = {
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "utf-8",
'custom-header': [('Accept-Encoding', 'gzip')],
'cookie': [
('cookie-name1', 'cookie-value1'),
('cookie-name2', 'cookie-value2'),
],
'no-outline': None
}
base_path = './'
in_html_name_list = os.listdir(base_path + '/test_data/my_html_0/')
out_path = base_path + '/test_out/my_html_0/'
for idx, html_name in enumerate(in_html_name_list):
# pdfkit.from_file(
# base_path + '/test_data/my_html_0/' + html_name,
# out_path + html_name[::-1].split('.', 1)[-1][::-1] + '.pdf',
# options=options)
html = ''.join(open(base_path + '/test_data/my_html_0/' + html_name, 'r', encoding='utf-8').readlines())
html = rectify_charset(html, 'utf-8')
pdfkit.from_string(
html,
out_path + html_name[::-1].split('.', 1)[-1][::-1] + '.pdf',
options=options)
# pdfkit.from_string(
# 'hello',
# out_path + html_name[::-1].split('.', 1)[-1][::-1] + '.pdf',
# options=options)
print('current processed num: ', idx + 1)
if __name__ == '__main__':
main()
4.相关链接:
安装wkhtmltopdf:Installing wkhtmltopdf · JazzCore/python-pdfkit Wiki · GitHub



