DORIS数据备份(HDFS)
安装HADOOP
提前安装HADOOP,搭建一个单节点的HADOOP临时使用。
参考:https://blog.csdn.net/taoruicheng1/article/details/135114606
相关镜像:
docker pull apache/hadoop:3.3.5
安装docker compose
wget https://github.com/docker/compose/releases/download/v2.16.0/docker-compose-linux-x86_64
cp docker-compose-linux-x86_64 /usr/local/bin/docker-compose
chmod +x /usr/local/bin/docker-compose
docker-compose --version
配置 cat docker-compose.yaml
(本样例中network使用的是本地 network_mode: host)
version: "3"
services:
namenode:
image: your_harbor/apache/hadoop:3.3.5
hostname: namenode
command: ["hdfs", "namenode"]
user: "root:root"
ports:
- 9870:9870
- 8020:8020
volumes:
- namenode:/tmp/hadoop-root/dfs
env_file:
- ./config.env
privileged: true
environment:
ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
network_mode: host
datanode:
image: your_harbor/apache/hadoop:3.3.5
hostname: datanode
command: ["hdfs", "datanode"]
user: "root:root"
env_file:
- ./config.env
privileged: true
ports:
- 9864:9864
- 9866:9866
volumes:
- datanode:/tmp/hadoop-root/dfs
network_mode: host
resourcemanager:
image: your_harbor/apache/hadoop:3.3.5
hostname: resourcemanager
command: ["yarn", "resourcemanager"]
user: "root:root"
ports:
- 8088:8088
- 8030:8030
- 8031:8031
- 8032:8032
- 8033:8033
env_file:
- ./config.env
volumes:
- ./test.sh:/opt/test.sh
network_mode: host
nodemanager:
image: your_harbor/apache/hadoop:3.3.5
command: ["yarn", "nodemanager"]
user: "root:root"
env_file:
- ./config.env
ports:
- 8042:8042
network_mode: host
volumes:
datanode:
namenode:
配置config.env
注意 default.name 和 defaultFS 配置,Doris创建备份仓库的时候要用到
CORE-SITE.XML_fs.default.name=hdfs://namenode
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
CORE-SITE.XML_hadoop.http.staticuser.user=root
CORE-SITE.XML_hadoop.tmp.dir=/tmp/hadoop-root
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
HDFS-SITE.XML_dfs.replication=1
MAPRED-SITE.XML_mapreduce.framework.name=yarn
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.jobhistory.address=0.0.0.0:10020
MAPRED-SITE.XML_mapreduce.jobhistory.webapp.address=0.0.0.0:19888
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=true
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=true
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
YARN-SITE.XML_yarn.nodemanager.resource.cpu-vcores=4
YARN-SITE.XML_yarn.application.classpath=opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
touch一个测试文件
touch test.sh
启动HADOOP
# 启动
docker-compose up -d
docker-compose ps
# 关闭 --rmi all
docker-compose down
docker-compose ps
查看日志
docker logs -f hadoop-namenode-1
docker logs -f hadoop-datanode-1
也可以测试一下功能是否好用
#-put命令:从本地文件系统拷贝到HDFS,其中/xxx/xxx/为hdfs中的路径
hdfs dfs -put bootstrap.sh /doris
#-copyFromLocal命令:从本地文件系统拷贝到HDFS,效果与-put命令等同
hdfs dfs -copyFromLocal b.txt /usr/opt/data
#-moveFromLocal命令:从本地文件系统剪切到HDFS,命令执行完后本地文件就没有了
hdfs dfs -moveFromLocal c.txt /usr/opt/data
配置hosts
指向你的HADOOP机器,确保DORIS中各fe/be节点能识别这些域名
cat /etc/hosts
echo "
192.168.12.123 datanode
192.168.12.123 namenode
192.168.12.123 resourcemanager
" >> /etc/hosts
cat /etc/hosts
访问web界面
9870:Namenode 的web界面端口
http://192.168.12.123:9870
配置DORIS仓库
使用root登录mysql客户端
mysql -uroot -P9030 -h 127.0.0.1
创建备份仓库
注意fs.defaultFS和fs.default.name配置
HADOOP集群目前只是单节点,所以复本数先配置为1,避免异常发生
could only be written to 0 of the 1 minReplication nodes. There are 3 datanode(s) running and 3 node(s) are excluded in this operation.
CREATE REPOSITORY hdfs_repo
WITH hdfs
ON LOCATION "hdfs://namenode/doris/hdfs_repo/"
PROPERTIES
(
"fs.defaultFS" = "hdfs://namenode",
"fs.default.name" = "hdfs://namenode",
"hadoop.username" = "root",
"dfs.replication" = "1",
"dfs.client.use.datanode.hostname" = "true",
"dfs.client.use.namenode.hostname" = "true"
);
SHOW CREATE REPOSITORY for hdfs_repo;
备份数据库
your_db_name 全库备份
BACKUP SNAPSHOT your_db_name.tag_date_202501
TO hdfs_repo;
show BACKUP\G;
数据有异常重新删除再来
CANCEL BACKUP FROM your_db_name;
DROP REPOSITORY hdfs_repo;
SHOW REPOSITORIES;
查看备份的快照
SHOW SNAPSHOT ON hdfs_repo;
恢复数据
(可以A库备份,B库恢复;在B库上创建同样的仓库hdfs_repo,即可查询到A库备份的数据)
RESTORE SNAPSHOT your_db_name.tag_date_202501
FROM hdfs_repo
PROPERTIES
(
"backup_timestamp"="2025-01-25-06-31-09", //通过查询快照,可以看到这个时间戳
"replication_num" = "1"
);
SHOW RESTORE FROM your_db_name\G;