<!-- 打开pdf时自动提取pdf文本内容 -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
<script>
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfInput = document.getElementById('userPdfInput');
// fileinput事件
pdfInput.addEventListener('change', async (event) => {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = async (e) => {
const arrayBuffer = e.target.result;
try {
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
let allText = '';
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const textItems = textContent.items.map(item => item.str);
const pageText = textItems.join('');
console.log(pageText)
allText += pageText;
}
console.log(allText );
} catch (error) {
console.error('提取文本时出错:', error);
}
};
reader.readAsArrayBuffer(file);
}
});
</script>
利用js提取PDF文件中的文本内容
最新推荐文章于 2025-05-28 13:52:30 发布