内存映射文件与大文件高效读取策略-CSDN博客

本文链接：https://blog.csdn.net/super_ccc/article/details/50215049

package com.zhongan.core.policy.biz.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;

import javax.annotation.Resource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.zhongan.core.policy.biz.manager.IFileManager;

public class MapFileToMemory {

    private String              path;
    @Resource
    private IFileManager        fileManager;

    private static final Logger log = LoggerFactory.getLogger(MapFileToMemory.class);

    public static void main(String[] args) throws IOException {
        File file = new File("E:\\生产问题查询\\policy_biz_id_2015.dat");
        MapFileToMemory mf = new MapFileToMemory();
        try {
            mf.parseFile();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }

    /**
     * 对一个文件画块后开启不同的线程，线程中划分的块中要记录下一个块的位置。
     * 
     * @throws IOException
     */
    public void parseFile() throws IOException {

        if (log.isWarnEnabled()) {
            log.warn("---------------update jhs policy task start--------------filepath=" + path);
        }

        long start = System.currentTimeMillis();

        File file = new File("E:\\生产问题查询\\policy_biz_id_2015.dat");//"E:\\生产问题查询\\policy_biz_id_2015.dat"
        if (!file.exists()) {
            log.error("文件不存在parseFile()=======filePath-->" + path);
            return;
        }
        try {
            long length = file.length();
            @SuppressWarnings("resource")
            MappedByteBuffer buffer = new RandomAccessFile(file, "r").getChannel().map(FileChannel.MapMode.READ_ONLY,
                    0, file.length());
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < (int) length; i++) {
                if (buffer.get(i) == 10) {//判断遇到换行符，处理此行数据
                    System.out.println(sb.toString());
                    fileManager.excuteContent(sb.toString());
                    sb.delete(0, sb.length());
                } else if (i == length - 1) {//判断到了最后一行，处理此行数据
                    sb.append((char) buffer.get(i));
                    System.out.println(sb.toString());
                    fileManager.excuteContent(sb.toString());
                } else {//拼接成一行数据
                    sb.append((char) buffer.get(i));
                }
            }
            sb = null;
        } catch (FileNotFoundException e) {
            log.error("cannot find the file" + file);
        } catch (IOException e) {
            log.error("IOException");
        } finally {
            if (log.isWarnEnabled()) {
                log.warn("处理文件时间(parseFile):" + (System.currentTimeMillis() - start) / 1000);
                log.warn("---------------update jhs policy task end--------------");
            }
        }
    }

    /**
     * @return the path
     */
    public String getPath() {
        return path;
    }

    /**
     * @param path the path to set
     */
    public void setPath(String path) {
        this.path = path;
    }

}

上面的就是利用内存映射按行读取文件的代码，经测试600M的文件需要10min，适合读取较大的文件，而且内存消耗较小，此处消耗内存10-20M，当然一般情况下，读文件的速度都是比较快的，慢的话只是处理文件比较慢，如果你的文件每行之间并没有什么依赖关系的话，可以用以下两种方式处理：

1.在此bean中注入一个线程池，利用多线程来处理读取的内容

2.将读出的内容丢到消息队列，然后再由其他程序来消费队列中的消息

这样就能有效提高程序的速度

内存映射文件：“

传统的文件 I/O 是通过用户进程发布 read( )和 write( )系统调用来传输数据的。为了在内核空间的文件系统页与用户空间的内存区之间移动数据，一次以上的拷贝操作几乎总是免不了的。这是因为，在文件系统页与用户缓冲区之间往往没有一一对应关系。但是，还有一种大多数操作系统都支持的特殊类型的 I/O 操作，允许用户进程最大限度地利用面向页的系统 I/O 特性，并完全摒弃缓冲区拷贝。这就是内存映射 I/O，如图 1-6 所示

内存映射 I/O 使用文件系统建立从用户空间直到可用文件系统页的虚拟内存映射。这样做有几个好处：
• 用户进程把文件数据当作内存，所以无需发布 read( )或 write( )系统调用。
• 当用户进程碰触到映射内存空间，页错误会自动产生，从而将文件数据从磁盘读进内存。如果用户修改了映射内存空间，相关页会自动标记为脏，随后刷新到磁盘，文件
得到更新。
• 操作系统的虚拟内存子系统会对页进行智能高速缓存，自动根据系统负载进行内存管理。
• 数据总是按页对齐的，无需执行缓冲区拷贝。
• 大型文件使用映射，无需耗费大量内存，即可进行数据拷贝。虚拟内存和磁盘 I/O 是紧密关联的，从很多方面看来，它们只是同一件事物的两面。在处理大量数据时，尤其要记得这一点。如果数据缓冲区是按页对齐的，且大小是内建页大小的倍数，那么，对大多数操作系统而言，其处理效率会大幅提升