一、累积型快照事实表
- 累积型快照事实表通常也会使用分区表,但是分区字段会使用 事实生命周期中,最早的时间!
事实发生的时间,作为分区字段! - 总结精简版:
①查出老数据(dwd层)中哪些分区的数据,在今天发生了变化
②根据变化的分区日期,查询老数据 old
③查询当天新的数据 new
④old full join new 新老交替
⑤将合并后的结果写入分区!
二、案例
insert overwrite table dwd_fact_order_info partition(dt)
SELECT
nvl(new.id,old.id) id,
nvl(new.order_status,old.order_status) order_status,
nvl(new.user_id,old.user_id) user_id,
nvl(new.out_trade_no,old.out_trade_no) out_trade_no,
nvl(new.status_map['1001'],old.create_time) create_time,
nvl(new.status_map['1002'],old.payment_time) payment_time,
nvl(new.status_map['1003'],old.cancel_time) cancel_time,
nvl(new.status_map['1004'],old.finish_time) finish_time,
nvl(new.status_map['1005'],old.refund_time) refund_time,
nvl(new.status_map['1006'],old.refund_finish_time) refund_finish_time,
nvl(new.province_id,old.province_id) province_id,
nvl(new.activity_id,old.activity_id) activity_id,
nvl(new.original_total_amount,old.original_total_amount) original_total_amount,
nvl(new.benefit_reduce_amount,old.benefit_reduce_amount) benefit_reduce_amount,
nvl(new.feight_fee,old.feight_fee) feight_fee,
nvl(new.final_total_amount,old.final_total_amount) final_total_amount,
date_format(nvl(new.status_map['1001'],old.create_time),'yyyy-MM-dd') dt
from
(SELECT
*
from dwd_fact_order_info
where dt
in
(
SELECT
date_format(create_time,'yyyy-MM-dd')
from ods_order_info
where dt='$do_date'
and
date_format(create_time,'yyyy-MM-dd') < '$do_date'
) )
old
full join
(select
t1.*,activity_id,status_map
from
(SELECT
*
from ods_order_info
where dt='$do_date') t1
left join
(SELECT
order_id,activity_id
from ods_activity_order
where dt='$do_date') t2
on t1.id = t2.order_id
left join
(
select
order_id,str_to_map(concat_ws(',',collect_set(concat(order_status,':',operate_time)))) status_map
from ods_order_status_log
where dt='$do_date'
GROUP by order_id
) t3
on t1.id=t3.order_id ) new
on old.id = new.id;