flink 同步kafka的数据到hbase_kafka flink hbase-CSDN博客

本文链接：https://blog.csdn.net/wuchongyong/article/details/130743631

该文章展示了一个使用ApacheFlink将数据从Kafka流式处理到HBase的示例。通过FlinkKafkaConsumer读取Kafka主题，转换数据为HBaseRow，然后用自定义的HBaseSink将数据写入HBase表。代码中包含了Kafka和HBase的配置以及连接管理。HBaseSink使用HBaseJavaAPI进行数据写入，但提到了可以优化为使用BulkLoadAPI以提升性能。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Flink 是一个分布式数据处理框架，Kafka 是一个高性能的消息队列，HBase 是一个分布式高可用的 NoSQL 数据库。Flink 可以很好地集成 Kafka 和 HBase，实现从 Kafka 同步数据到 HBase。

下面是一个简单的示例代码，演示如何使用 Flink 将 Kafka 中的数据同步到 HBase 中：

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.flink.streaming.connectors.kafka.KafkaSink;
import org.apache.flink.streaming.connectors.kafka.KafkaTopicPartition;
import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper;
import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema;
import org.apache.flink.types.Row;

import java.util.Properties;

public class KafkaToHBase {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // Kafka 配置
        Properties kafkaProps = new Properties();
        kafkaProps.setProperty("bootstrap.servers", "localhost:9092");
        kafkaProps.setProperty("group.id", "test");

        // 创建 Kafka 数据源
        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>("input", new SimpleStringSchema(), kafkaProps);
        DataStream<String> kafkaDataStream = env.addSource(kafkaConsumer);

        // 将 Kafka 中的数据转换为 HBase 行
        DataStream<Row> hbaseDataStream = kafkaDataStream.map(new MapFunction<String, Row>() {
            @Override
            public Row map(String value) throws Exception {
                // 将 value 转换为 HBase 行
                Row row = new Row(3);
                row.setField(0, "rowkey");
                row.setField(1, "cf");
                row.setField(2, "value");
                return row;
            }
        });

        // HBase 配置
        Properties hbaseProps = new Properties();
        hbaseProps.setProperty("hbase.zookeeper.quorum", "localhost:2181");
        hbaseProps.setProperty("zookeeper.znode.parent", "/hbase-unsecure");

        // 创建 HBase Sink
        HBaseSink hbaseSink = new HBaseSink(hbaseProps, "tableName");

        // 将 HBase 行写入 HBase
        hbaseDataStream.addSink(hbaseSink);

        env.execute("KafkaToHBase");
    }
}

// HBase Sink
class HBaseSink extends RichSinkFunction<Row> {
    private transient Connection connection;
    private transient Table table;
    private final Properties props;
    private final String tableName;

    public HBaseSink(Properties props, String tableName) {
        this.props = props;
        this.tableName = tableName;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum", props.getProperty("hbase.zookeeper.quorum"));
        config.set("zookeeper.znode.parent", props.getProperty("zookeeper.znode.parent"));
        connection = ConnectionFactory.createConnection(config);
        table = connection.getTable(TableName.valueOf(tableName));
    }

    @Override
    public void close() throws Exception {
        super.close();
        if (table != null) {
            table.close();
        }
        if (connection != null) {
            connection.close();
        }
    }

    @Override
    public void invoke(Row row, Context context) throws Exception {
        Put put = new Put(Bytes.toBytes(row.getField(0).toString()));
        put.addColumn(Bytes.toBytes(row.getField(1).toString()), Bytes.toBytes(row.getField(2).toString()), Bytes.toBytes(row.getField(3).toString()));
        table.put(put);
    }
}