C# 批量读取word并且把文字保存到txt,可以保存图片

这个C#代码示例展示了如何使用多线程异步读取Word文档的内容和图片,通过调用UtilsDocument.GetWordImageSync方法同步提取图片,并使用ReadWPSContent方法读取文档文本。代码还提供了选择单个文件或整个目录下文件进行处理的功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Word;

namespace WindowsFormsApp2
{
    /// <summary>
    /// 主窗体类(用于获取word文档中的文字和图片)
    /// </summary>
    public partial class frmMain : Form
    {
        /// <summary>
        /// 读取文档内容异步线程
        /// </summary>
        private BackgroundWorker _readDocWorker = null;

        /// <summary>
        /// 文档路径
        /// </summary>
        private string _docPath = string.Empty;
        private string _dirPath = string.Empty;

        /// <summary>
        /// word文件的名字的绝对路径
        /// </summary>
        List<string> ListOfName = new List<string>();

 


        /// <summary>
        /// 窗体加载事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void FrmMain_Load(object sender, EventArgs e)
        {
            _readDocWorker = new BackgroundWorker();
            _readDocWorker.DoWork += _readDocWorker_DoWork;
            _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;             
        }


        /// <summary>
        /// 测试多线程处理
        /// </summary>
        /// <param name="filename"></param>
        private void ReadDoc(string filename)
        {
            if (File.Exists(filename))
            {
                BackgroundWorker _readDocWorker = new BackgroundWorker();
               
                _readDocWorker.DoWork += _readDocWorker_DoWork;
                _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
               
                _readDocWorker.RunWorkerAsync(filename);
            }

        }
        /// <summary>
        /// 选择文档按钮点击事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnChooseFile_Click(object sender, EventArgs e)
        {
            var openfile = new OpenFileDialog();

            openfile.Filter = "文档(*.doc;*.docx)|*.doc;*.docx";
            openfile.Title = "请选择文档";

            if (openfile.ShowDialog() == DialogResult.OK)
            {
                _docPath = openfile.FileName;
                this.richTxtBox.Text = "正在加载。。。";
                this.btnChooseFile.Enabled = false;

                _readDocWorker.RunWorkerAsync();
            }
            else
            {
                this.richTxtBox.Text = "请选择文档";
            }
        }

        /// <summary>
        /// 读取文档内容事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
            
            var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

            //从文档中同步提取图片

            UtilsDocument.GetWordImageSync(_docPath, imgName);

            //读取文档中的文本内容

            var content = ReadWPSContent(_docPath);

            if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
            {
                StringBuilder sb = new StringBuilder(content);
                var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                StreamWriter sw = new StreamWriter(fs);

                sw.Write(content);

                sw.Close();
                fs.Close();
            }

            e.Result = content;

            Thread.Sleep(10000);
        }

        /// <summary>
        /// 读取文档内容完成事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            var content = (string)e.Result;

            if (!string.IsNullOrEmpty(content))
            {
                this.richTxtBox.Text = content;

                #region 显示进度
                try
                {
                    int i = Int16.Parse(progressBar.Tag.ToString());
                   
                    if (i != 0)
                    {
                        progressBar.Value += 100 / i;

                        num_lbl.Text = progressBar.Value.ToString();
                    }
                    else
                    {
                        num_lbl.Text = "0/0";
                    }
                }
                catch (Exception ee)
                {

                }
                #endregion

            }
            else
            {
                this.richTxtBox.Text = "读取失败";
            }

            this.btnChooseFile.Enabled = true;
        }

        /// <summary>
        /// 构造函数
        /// </summary>
        public frmMain()
        {
            InitializeComponent();

            this.Load += FrmMain_Load;
        }

        /// <summary>
        /// 读取WPS文档的内容(这里用的是WPS的API)
        /// </summary>
        /// <param name="docPath"></param>
        private string ReadWPSContent(string docPath)
        {
            //定义Word实例和文档实例

            var word = new Word.Application();
            var doc = new Word.Document();
            var txtContent = string.Empty;

            try
            {
                //设置打开文档的参数,这里是只读打开

                object name = docPath;
                object Range = System.Reflection.Missing.Value;
                object unknow = Type.Missing;
                object isReadOnly = true;

                //打开给定目录的文档

                word.Visible = false;

                doc = word.Documents.Open(ref name, ref unknow, ref isReadOnly, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow);

                //全选文档中的数据并复制到剪切板

                doc.ActiveWindow.Selection.WholeStory();
                doc.ActiveWindow.Selection.Copy();

                //获取当前剪贴板上的数据

                IDataObject data = null;

                if (this.InvokeRequired)
                {
                    this.Invoke((Action)delegate
                    {
                        data = Clipboard.GetDataObject();
                    });
                }
                else
                {
                    data = Clipboard.GetDataObject();
                }

                if (data != null)
                {
                    //获取文本类型数据

                    if (data.GetDataPresent(DataFormats.Text))
                    {
                        txtContent = (string)data.GetData(DataFormats.Text);
                    }
                    else
                    {
                        txtContent = string.Empty;
                    }
                }
                else
                {
                    txtContent = string.Empty;
                }
            }
            catch (Exception exc)
            {
                txtContent = string.Empty;
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                    doc = null;
                }

                if (word != null)
                {
                    word.Quit();
                    word = null;
                }
            }

            return txtContent;
        }

        /// <summary>
        /// 获取目录下的文件名称按钮
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnDirectChoose_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();
            
            fbd.SelectedPath = "D:\\003、历史项目\\219、业绩考评系统\\项目文档\\2019、2020年司法档案工作\\2019-完整数据";

            fbd.SelectedPath = "C:\\Users\\HUAWEI\\Desktop\\test";
            DialogResult result = fbd.ShowDialog();
            progressBar.Tag = 0;

            if (result == DialogResult.OK && !string.IsNullOrWhiteSpace(fbd.SelectedPath))
            {
                _dirPath = fbd.SelectedPath;
                string[] files = GetFilename(_dirPath);
                if (null != files)
                {
                    for(int i = 0; i < files.Length; i++)
                    {
                        FileListBox.Items.Add(files[i]);
                        //ReadDoc(files[i]);
                        //break;
                    }
                }
                progressBar.Tag = files.Length;
                
            }

            conversion();

            //MessageBox.Show(_dirPath);
        }
        
        /// <summary>
        /// 获取目录下的所有的文件列表
        /// </summary>
        /// <param name="_dirPath"></param>
        /// <returns></returns>
        private string [] GetFilename (string _dirPath)
        {
            string[] files =null;
            
            DirectoryInfo dire = new DirectoryInfo(_dirPath);
            FileInfo[] fileinfo = dire.GetFiles();
            
            files = new string[fileinfo.Length];

            for (int i = 0; i < fileinfo.Length; i++)
            {
                files[i]=fileinfo[i].FullName;
                ListOfName.Add(fileinfo[i].FullName);
            }
            return files;
        }

        /// <summary>
        /// 批量转换
        /// </summary>
        private void conversion()
        {
            for (int i = 0; i < ListOfName.Count; i++)
            {
                _docPath = ListOfName[i];

                /*
                if (i == 0)
                {
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                else
                {
                    _readDocWorker = null;
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                


                _readDocWorker.RunWorkerAsync();
                */

                var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);

                var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

                //从文档中同步提取图片

                UtilsDocument.GetWordImageSync(_docPath, imgName);

                //读取文档中的文本内容

                var content = ReadWPSContent(_docPath);

                if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
                {
                    StringBuilder sb = new StringBuilder(content);
                    var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                    FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                    StreamWriter sw = new StreamWriter(fs);

                    sw.Write(content);

                    sw.Close();
                    fs.Close();
                }


            }
                
        }

    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值