通过JAVA API远程操作HDFS文件系统工具类
首先,导入HDFS的相关依赖:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.1</version>
</dependency>
工具类代码如下:
package com.huatec.demo.HDFS.utils;
import com.huatec.demo.HDFS.entity.HDFSFileEntity;
import com.huatec.demo.HDFS.entity.HDFSLabelEntity;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* @ Auther: houwenjie
* @ Date: 2018/9/8 10:57
* @ Description:远程操纵HDFS工具类
*/
public class HDFSIOUtil {
private static Configuration conf =new Configuration();
private static FileSystem fs =null;
private static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//24小时制
private HDFSIOUtil(){}
/**
* @ return:
* @ auther: houwenjie
* @ date: 2018/9/3 14:59
* @ Description: 读取 hdfs 上面文件
*/
public static byte[] readFile(String filePath, String user) throws IOException, InterruptedException {
FSDataInputStream in = null;
Path srcPath = new Path(filePath);
StringBuilder stringBuilder=null;
try {
// fs = srcPath.getFileSystem(conf);
fs= FileSystem.get(srcPath.toUri(),conf,user);
in = fs.open(srcPath);
// InputStreamReader isr = new InputStreamReader(in, "utf-8");
// BufferedReader br = new BufferedReader(isr);
// String line;
//
// StringBuffer stringBuffer = new StringBuffer();
// while ((line = br.readLine()) != null) {
// stringBuffer.append(line);
// stringBuffer.append("\r\n");
byte[] buffer = new byte[10240];
int readBytes = 0;
// stringBuilder = new StringBuilder();
ByteArrayOutputStream bos=new ByteArrayOutputStream();
while((readBytes = in.read(buffer)) > 0){
// stringBuilder.append(new String(buffer, 0, readBytes));
bos.write(buffer,0,readBytes);
}
return bos.toByteArray();
// IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
/**
* @ auther: houwenjie
* @ date: 2018/9/8 11:21
* @ Description: 将文件上传到hdfs 上传路径需要指定文件名
*/
public static void copyFileToHDFS(File file, String HDFSPath,String user) throws IOException, InterruptedException {
Path path = new Path(HDFSPath);
fs= FileSystem.get(path.toUri(),conf,user);
//fs = path.getFileSystem(conf);
InputStream in = new BufferedInputStream(new FileInputStream(file));
OutputStream out = fs.create(new Path(HDFSPath));
IOUtils.copyBytes(in, out, 4096000, true);
// out.close();
// in.close();
}
/**
* 获取hdfs路径下的文件列表
*
* @param path
* @return
*/
public static List<HDFSFileEntity> getFileList(Path path, String user) throws IOException, InterruptedException {
fs = FileSystem.get(path.toUri(),conf,user);
Date date2 = new Date();
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//24小时制
List<HDFSFileEntity> files = new ArrayList<HDFSFileEntity>();
HDFSFileEntity hdfsFileEntity = null;
if (fs.exists(path) && fs.isDirectory(path)) {
for (FileStatus status : fs.listStatus(path)) {
hdfsFileEntity= new HDFSFileEntity();
// String s=(status.isDirectory() ? "d" : "-")+status.getPermission()+","+status.getLen()+","+simpleDateFormat.format(date2)+","+status.getPath();
hdfsFileEntity.setPermission((status.isDirectory() ? "d" : "-")+status.getPermission());
hdfsFileEntity.setSize(status.getLen());
hdfsFileEntity.setModification_time(simpleDateFormat.format(date2));
hdfsFileEntity.setPath(status.getPath().toString());
hdfsFileEntity.setBlock_replication((int)status.getReplication());
hdfsFileEntity.setOwner(status.getOwner());
hdfsFileEntity.setGroup(status.getGroup());
hdfsFileEntity.setBlocksize(status.getBlockSize());
files.add(hdfsFileEntity);
}
}
//fs.close();
return files;
}
public static boolean mkdir(Path path,String user) throws IOException, InterruptedException {
fs = FileSystem.get(path.toUri(),conf,user);
boolean isok = fs.mkdirs(path);
return isok;
//fs.close();
}
/**
*
* @ return:
* @ auther: houwenjie
* @ date: 2018/9/10 14:34
* @ Description: 根据路径递归查询路径下的文件
*/
public static List<String> getFileSystem(Path path,List<String> files,String user) throws IOException, InterruptedException {
Date date2 = new Date();
fs = FileSystem.get(path.toUri(),conf,user);
if (fs.exists(path) && fs.isDirectory(path)) {
for (FileStatus status : fs.listStatus(path)) {
files.add((status.isDirectory() ? "d" : "-")+status.getPermission()+","+status.getLen()+","+simpleDateFormat.format(date2)+","+status.getPath());
getFileSystem(status.getPath(),files,user);
}
}
return files;
}
/**
* @ auther: houwenjie
* @ date: 2018/9/12 11:21
* @ Description: 判断给定的路径是否是文件
*/
public static boolean isFile( String HDFSPath,String user) throws IOException, InterruptedException {
Path path = new Path(HDFSPath);
fs = FileSystem.get(path.toUri(),conf,user);
boolean isFile = fs.isFile(path);
return isFile;
}
/**
* @ auther: houwenjie
* @ date: 2018/9/12 11:21
* @ Description: 删除文件目录
*/
public static boolean delete( String HDFSPath,String user) throws IOException, InterruptedException {
Path path = new Path(HDFSPath);
fs = FileSystem.get(path.toUri(),conf,user);
boolean isFile = fs.delete(path,true);
return isFile;
}
/**
* @ auther: houwenjie
* @ date: 2018/9/12 11:21
* @ Description: 重命名文件
*/
public static boolean rename( String oldPath,String newPath,String user) throws IOException, InterruptedException {
Path oldpath = new Path(oldPath);
Path newpath = new Path(newPath);
fs = FileSystem.get(oldpath.toUri(),conf,user);
boolean isFile = fs.rename(oldpath,newpath);
return isFile;
}
/**
* @ auther: houwenjie
* @ date: 2019/5/24 10:24
* @ Description: 判断路径是否存在
*/
public static boolean isExist( Path HDFSPath,String user) throws IOException, InterruptedException {
fs = FileSystem.get(HDFSPath.toUri(),conf,user);
boolean exists = fs.exists(HDFSPath);
return exists;
}
/**
* @ return:
* @ auther: houwenjie
* @ date: 2018/10/16 13:45
* @ Description:如果是高可用集群,判断是否是active的namenode
*/
public static String getActiveNameNode(String HDFSPath,String user) {
try {
Path path = new Path(HDFSPath);
fs = FileSystem.get(path.toUri(),conf,user);
InetSocketAddress active = HAUtil.getAddressOfActive(fs);
InetAddress address = active.getAddress();
return "hdfs://" + address.getHostAddress() + ":" + active.getPort();
} catch (Exception e) {
// e.printStackTrace();
return null;
}
}
/**
* @ return:
* @ auther: houwenjie
* @ date: 2018/10/16 14:35
* @ Description: 在HDFS上复制文件
*/
public static boolean copy(String oldPath,String newPath,String user) throws IOException, InterruptedException {
Path path_old = new Path(oldPath);
Path path_new = new Path(newPath);
fs = FileSystem.get(path_old.toUri(),conf,user);
System.setProperty("HADOOP_USER_NAME", user );
boolean copy =FileContext.getFileContext().util().copy(path_old, path_new);;
System.out.println("copy finished!");
return copy;
}
}
HDFSFileEntity
实体类
package com.huatec.edu.cloud.hdata.core.ooziejob.utils;
import org.apache.hadoop.fs.Path;
/**
* @ Auther: houwenjie
* @ Date: 2018/10/15 14:08
* @ Description:
*/
public class HDFSFileEntity {
private String path;
private long size;
private int block_replication;
private long blocksize;
private String modification_time;
private String permission;
private String owner;
private String group;
@Override
public String toString() {
return "HDFSFileEntity{" +
"path='" + path + '\'' +
", size=" + size +
", block_replication=" + block_replication +
", blocksize=" + blocksize +
", modification_time='" + modification_time + '\'' +
", permission='" + permission + '\'' +
", owner='" + owner + '\'' +
", group='" + group + '\'' +
'}';
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public long getSize() {
return size;
}
public void setSize(long size) {
this.size = size;
}
public int getBlock_replication() {
return block_replication;
}
public void setBlock_replication(int block_replication) {
this.block_replication = block_replication;
}
public long getBlocksize() {
return blocksize;
}
public void setBlocksize(long blocksize) {
this.blocksize = blocksize;
}
public String getModification_time() {
return modification_time;
}
public void setModification_time(String modification_time) {
this.modification_time = modification_time;
}
public String getPermission() {
return permission;
}
public void setPermission(String permission) {
this.permission = permission;
}
public String getOwner() {
return owner;
}
public void setOwner(String owner) {
this.owner = owner;
}
public String getGroup() {
return group;
}
public void setGroup(String group) {
this.group = group;
}
}