Flink的窗口模型允许除了WindowAssigner和Trigger之外还指定一个可选的Evictou。可以试用evictor(…)方法来完成此操作。Evictor可以在Trigger处罚后,应用Window Function之前或之后从窗口中删除元素。
源码:
public interface Evictor<T, W extends Window> extends Serializable {
/**
* 可选,用于在调用 windowing function之前驱除元素.
*
* @param elements 当前窗口中的所有元素.
* @param size 当前窗口元素的大小.
* @param window 窗口对象
* @param evictorContext The context for the Evictor
*/
void evictBefore(Iterable<TimestampedValue<T>> elements, int size, W window, EvictorContext evictorContext);
/**
* 可选,用于在调用 windowing function之后驱除元素.
*
* @param elements 当前窗口中的所有元素.
* @param size 当前窗口元素的大小.
* @param window 窗口对象
* @param evictorContext The context for the Evictor
*/
void evictAfter(Iterable<TimestampedValue<T>> elements, int size, W window, EvictorContext evictorContext);
}
pre-implemented evictors
:
从窗口中保留用户指定的元素,并从窗口缓冲区开始处丢弃剩余的元素。
部分源码剖析:
private void evict(Iterable<TimestampedValue<Object>> elements, int size, EvictorContext ctx) {
//window中的元素总数是否大于 maxCount
if (size <= maxCount) {
return;
} else {
int evictedCount = 0;//需要剔除的元素个数计数器
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();iterator.hasNext();){
iterator.next();
evictedCount++;
if (evictedCount > size - maxCount) {//剔除的元素数量够了,结束
break;
} else {//从迭代器中删除 元素
iterator.remove();
}
}
}
}
DeltaEvictor
:
获取一个DeltaFunction和一个阈值,计算窗口缓冲区中最后一个元素和每个剩余元素之间的增量,并删除增量大于或等于阈值的元素。
部分源码剖析:
private void evict(Iterable<TimestampedValue<T>> elements, int size, EvictorContext ctx) {
//获取窗口中最后一个元素
TimestampedValue<T> lastElement = Iterables.getLast(elements);
//迭代遍历窗口中的其余元素和最后一个元素计算差值
for (Iterator<TimestampedValue<T>> iterator = elements.iterator(); iterator.hasNext();){
TimestampedValue<T> element = iterator.next();//获取窗口的元素
if (deltaFunction.getDelta(element.getValue(), lastElement.getValue()) >= this.threshold) {
iterator.remove();//剔除差值大于等于阈值的元素
}
}
}
TimeEvictor
:
以毫秒为间隔作为参数,对于给定的窗口,它会在其元素中找到最大时间戳max_tx,并删除时间戳小于max_ts-interval的所有元素。
部分源码剖析:
private void evict(Iterable<TimestampedValue<Object>> elements, int size, EvictorContext ctx) {
//如果不是基于时间的窗口,直接返回
if (!hasTimestamp(elements)) {
return;
}
//获取所有元素中的 最大时间
long currentTime = getMaxTimestamp(elements);
//拿最大时间减去 windowSize获取evictCutoff ,所有时间 小于或等于该值的元素剔除掉
long evictCutoff = currentTime - windowSize;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator(); iterator.hasNext(); ) {
TimestampedValue<Object> record = iterator.next();
if (record.getTimestamp() <= evictCutoff) {
iterator.remove();
}
}
}
//判断元素是否包含时间
private boolean hasTimestamp(Iterable<TimestampedValue<Object>> elements) {
Iterator<TimestampedValue<Object>> it = elements.iterator();
if (it.hasNext()) {
return it.next().hasTimestamp();
}
return false;
}
//计算最大时间
private long getMaxTimestamp(Iterable<TimestampedValue<Object>> elements) {
long currentTime = Long.MIN_VALUE;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator(); iterator.hasNext();){
TimestampedValue<Object> record = iterator.next();
currentTime = Math.max(currentTime, record.getTimestamp());
}
return currentTime;
}
导入相关依赖:
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-filesystem_2.11</artifactId>
<version>1.8.2</version>
</dependency>
</dependencies>
示例代码:
class KeyWordEvictor(keyWord:String,doEvictorAfter:Boolean=false) extends Evictor[String,TimeWindow]{
override def evictBefore(elements: lang.Iterable[TimestampedValue[String]], size: Int, window: TimeWindow, evictorContext: Evictor.EvictorContext): Unit = {
if(!doEvictorAfter){
evict(elements,size,window,evictorContext)
}
}
override def evictAfter(elements: lang.Iterable[TimestampedValue[String]], size: Int, window: TimeWindow, evictorContext: Evictor.EvictorContext): Unit = {
if(doEvictorAfter){
evict(elements,size,window,evictorContext)
}
}
private def evict(elements: lang.Iterable[TimestampedValue[String]], size: Int, window: TimeWindow, evictorContext: Evictor.EvictorContext): Unit={
val iterator = elements.iterator()
while(iterator.hasNext){
val element = iterator.next()
if(element.getValue.contains(keyWord)){
iterator.remove()
}
}
}
}
class KeyWordTrigger(keyWord:String) extends Trigger[String,TimeWindow]{
override def onElement(element: String, timestamp: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = {
println("onElement:"+element)
if(element.contains(keyWord)){
TriggerResult.FIRE//触发并清除窗口中的元素,并不删除窗口
}else{
TriggerResult.CONTINUE
}
}
override def onProcessingTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = {
TriggerResult.CONTINUE
}
override def onEventTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = {
TriggerResult.CONTINUE
}
override def canMerge: Boolean = true
override def clear(window: TimeWindow, ctx: Trigger.TriggerContext): Unit = {
println("窗口被清除了")
}
}
object FlinkWindowEvictor {
def main(args: Array[String]): Unit = {
//1.创建流计算执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("Flink", 9999)
.windowAll(TumblingProcessingTimeWindows.of(Time.seconds(10)))
.trigger(new KeyWordTrigger("end"))
.evictor(new KeyWordEvictor(keyWord = "end",false))
.apply(new UserDefineAllWindowFunction)
.print()
env.execute("FlinkWindowTrigger")
}
}
class UserDefineAllWindowFunction extends AllWindowFunction[String,String,TimeWindow]{
override def apply(window: TimeWindow, input: Iterable[String], out: Collector[String]): Unit = {
out.collect(input.mkString(","))
}
}