Flink学习之 - map与flatMap

本文介绍了如何使用Apache Flink进行实时数据流处理,从Kafka消费数据,筛选billCode以'test'开头的数据,然后加入当前时间戳,并处理成JSONObject。最后演示了如何将数据写入HBase并展示数据转换过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import org.apache.hadoop.hbase.client.Connection;
import java.util.Map;

public class MyFlinkTest {
    public static void main(String[] args) throws Exception {
      
        StreamExecutionEnvironment env = FlinkUtils.getStreamExecutionEnvironment();
        env.setParallelism(10);

        DataStreamSource<String> kafkaStream = env
                .addSource(KafkaUtil.getFlinkKafkaConsumer("dwd_table", "dwdTableTest"));

        kafkaStream.rebalance().map(new MapFunction<String, Object>() {
            private static final long serialVersionUID = 1L;
            public String map(String value){
                System.out.println("src value:"+value);
                return null;
            }
        }).uid("src.print").name("src.print.name").setParallelism(1);

        //只取billCode为test开头的数据
        SingleOutputStreamOperator<JSONObject> processStream = dataProcess(kafkaStream);

        //增加当前时间字段
        SingleOutputStreamOperator<JSONObject>  curTimeStream = curTimeDataProcess(kafkaStream);

        curTimeStream.map(new MapFunction<JSONObject,Object>() {
            @Override
            public Object map(JSONObject jsonObject) throws Exception {
                System.out.println("json value:"+jsonObject);
                return null;
            }
        }).uid("json.print").name("json.print.name").setParallelism(2);

        env.execute("dwdTableTest");

    }

    private static SingleOutputStreamOperator<JSONObject> dataProcess(DataStreamSource<String> kafkaStream) {
        return kafkaStream.flatMap(new FlatMapFunction<String, JSONObject>() {
            @Override
            public void flatMap(String value, Collector<JSONObject> collector) throws Exception {
                JSONObject source = JSON.parseObject(value);
                if(source.getString("billCode").startsWith("test")){
                    collector.collect(source);
                }
            }
        }).uid("billCode.test").name("billCode.test.name").setParallelism(4);
    }


    private static SingleOutputStreamOperator<JSONObject> curTimeDataProcess(DataStreamSource<String> kafkaStream) {
        return kafkaStream.map(new MapFunction<String, JSONObject>() {
            @Override
            public JSONObject map(String s) throws Exception {
                JSONObject source = JSON.parseObject(s);
                source.put("curTime", DateUtils.getLocalDateTimeStr());
                return source;
            }
        }).uid("curTime.test").name("curTime.test.name").setParallelism(4);
    }


    private static SingleOutputStreamOperator<MyTestDTO> richMapProcess1(DataStreamSource<String> kafkaStream) {
        return kafkaStream.map(new RichMapFunction<String, MyTestDTO>() {

            private Connection conn;

            @Override
            public void open(Configuration parameters) throws Exception {
                conn = HbaseUtils.getConn();
            }

            @Override
            public void close() throws Exception {
                HbaseUtils.close(conn);
            }

            @Override
            public MyTestDTO map(String s) throws Exception {
                MyTestDTO myTestDTO = new MyTestDTO();
                myTestDTO.setDate("2021-10-27");
                myTestDTO.setWeight(1);
                return myTestDTO;
            }

        }).uid("richMapProcess1").setParallelism(4);
    }

    private static SingleOutputStreamOperator<String> richMapProcess2(DataStreamSource<JSONObject> kafkaStream) {
        return kafkaStream.map(new RichMapFunction<JSONObject, String>() {

            @Override
            public String map(JSONObject jSONObject) throws Exception {
                return jSONObject.toJSONString();
            }

        }).uid("richMapProcess2").setParallelism(4);
    }

    private static SingleOutputStreamOperator<MyTestDTO> richMapProcess3(DataStreamSource<Map<String,String>> kafkaStream) {
        return kafkaStream.map(new RichMapFunction<Map<String, String>, MyTestDTO>() {

            @Override
            public MyTestDTO map(Map<String, String> stringStringMap) throws Exception {
                return null;
            }

        }).uid("richMapProcess3").setParallelism(4);
    }

}
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@NoArgsConstructor
@AllArgsConstructor
public class MyTestDTO {
    private String date;
    private double weight;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值