1 下载py3.60版本
2 pip install pdfminer3k
pip install pyocr
3 直接把需要转换的pdf 文件放入py程序所在目录
使用
源代码:
import pyocr
import importlib
import sys,io
import time
importlib.reload(sys)
time1 = time.time()
print(“初始时间为:”,time1)
import os.path
from pdfminer.pdfparser import PDFParser,PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal,LAParams
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding=‘utf8’)
text_path = r’words-words.pdf’
text_path = r’photo-words.pdf’
def parse():
#’’‘解析PDF文本,并保存到TXT文件中’’’
fp = open(‘words-words.pdf’,mode