1- Keyed State案例
以WordCount 的 sum
所使用的StreamGroupedReduce
类为例,讲解了如何在代码中使用
需求:使用KeyState中的ValueState获取数据中的最大值(实际中直接使用maxBy即可)
用户自己管理KeyedState,存储Key的状态值,步骤如下:
//-1.定义一个状态用来存放最大值
private transient ValueState<Long> maxValueState;
//-2.创建一个状态描述符对象
ValueStateDescriptor descriptor = new ValueStateDescriptor("maxValueState", Long.class);
//-3.根据状态描述符获取State
maxValueState = getRuntimeContext().getState(maxValueStateDescriptor);
//-4.使用State
Long historyValue = maxValueState.value();
//判断当前值和历史值谁大
if (historyValue == null || currentValue > historyValue)
//-5.更新状态
maxValueState.update(currentValue);
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @author liu a fu
* @version 1.0
* @date 2021/3/9 0009
* @DESC Flink State 中KeyedState,默认情况下框架自己维护,此外可以手动维护
*/
public class StreamKeyedStateDemo {
public static void main(String[] args) throws Exception {
//1-环境准备
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1); //全局并行度设置
//2-数据源source
DataStreamSource<Tuple3<String, String, Long>> tupleStream = env.fromElements(
Tuple3.of("上海", "普陀区", 488L), Tuple3.of("上海", "徐汇区", 212L),
Tuple3.of(