HDFS文件路径: /users/userfriends/user_friends.csv
csv文件数据如下:
创建外部表(ods)
去除首行,字段间按照“,”分割。
create external table if not exists dwd_user_friends (
userid string,
friends string
)
row format delimited fields terminated by ',' //字段按 ',' 分割
location '/users/userfriends'
tblproperties ("skip.header.line.count=1") //跳过第一行数据
====================数据如下===========================
userid friends
3197468391 1346449342 3873244116 4226080662 1222907620 547730952 1052032722
3537982273 1491560444 395798035 2036380346 899375619 3534826887 3427911581
创建侧视图
实现一行数据记录一个user及其一个firend。
语法:lateral view explode
//语法
select userid,friend
from (select * from ods_user_friends where friends != '') e
lateral view explode (split(friends,' ')) f as friend
========================数据如下=================================
userid friend
3197468391 1346449342
3197468391 3873244116
3197468391 4226080662
3197468391 1222907620
3197468391 547730952
3197468391 1052032722
创建内部表(dwd)
语法:create table t1 as select […] from t2
create table dwd_user_friends as
select userid,friend from (select * from ods_user_friends where friends != '') e
lateral view explode (split(friends,' ')) f as friend ;
============================数据如下============================================
userid friend
3197468391 1346449342
3197468391 3873244116
3197468391 4226080662
3197468391 1222907620
3197468391 547730952
3197468391 1052032722
建表常用关键语句:
1.指定字段间的分割符号,防止字段内存在“,”而分割字段内容。
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties (
'separatorChar' = ',', //字段指定字段分隔符
'quoteChar' = '\"', //字段内容引用字符
'escapeChar' = '\\' //转义字符
)
2.跳过第一行数据
tblproperties ("skip.header.line.count=1") //写在建表语句最后一行