一、get_json_object
废话不多说,直接上sql
impala> select
> get_json_object(json_str, '$.*') as res1, --获取全部属性值
> get_json_object(json_str, '$.a') as res2, --获取a属性值
> get_json_object(json_str, '$.c.d') as res3, --嵌套:获取c属性的d属性值
> get_json_object(json_str, '$.b[1]') as res4, --获取b(JsonArray)的第2个元素
> get_json_object(json_str, '$.b[*]') as res5, --获取b的全部属性值
> get_json_object(json_str, '$.x.y') as res6 --获取不存在的属性值
> from
> (
> select '{"a":1,"b":[2,3,4],"c":{"d":5}}' as json_str
> ) t;
+---------------------+------+------+------+---------+------+
| res1 | res2 | res3 | res4 | res5 | res6 |
+---------------------+------+------+------+---------+------+
| [1,[2,3,4],{"d":5}] | 1 | 5 | 3 | [2,3,4] | NULL |
+---------------------+------+------+------+---------+------+
hive> select
> get_json_object(json_str, '$.*') as res1, --获取全部属性值
> get_json_object(json_str, '$.a') as res2, --获取a属性值
> get_json_object(json_str, '$.c.d') as res3, --嵌套:获取c属性的d属性值
> get_json_object(json_str, '$.b[1]') as res4, --获取b(JsonArray)的第2个元素
> get_json_object(json_str, '$.b[*]') as res5, --获取b的全部属性值
> get_json_object(json_str, '$.x.y') as res6 --获取不存在的属性值
> from
> (
> select '{"a":1,"b":[2,3,4],"c":{"d":5}}' as json_str
> ) t;
+------+------+------+------+---------+------+
| res1 | res2 | res3 | res4 | res5 | res6 |
+------+------+------+------+---------+------+
| NULL | 1 | 5 | 3 | [2,3,4] | NULL |
+------+------+------+------+---------+------+
hive好像不支持 $.* 的写法,初步怀疑是将 * 当成了属性的名字,有空再研究一下
二、json_tuple
hive> select
> json_tuple(json_str, 'a', 'c', 'b')
> from
> (
> select '{"a":1,"b":[2,3,4],"c":{"d":5}}' as json_str
> ) t;
+-----+----------+----------+
| c0 | c1 | c2 |
+-----+----------+----------+
| 1 | {"d":5} | [2,3,4] |
+-----+----------+----------+
hive> select
> t2.col1,
> t2.col2,
> get_json_object(t2.col2, '$[0]'),
> t2.col3,
> get_json_object(t2.col3, '$.d')
> from
> (
> select '{"a":1,"b":[2,3,4],"c":{"d":5}}' as json_str
> ) t1
> lateral view json_tuple(json_str,'a', 'b', 'c') t2 as col1, col2, col3;
+----------+----------+------+----------+------+
| t2.col1 | t2.col2 | _c2 | t2.col3 | _c4 |
+----------+----------+------+----------+------+
| 1 | [2,3,4] | 2 | {"d":5} | 5 |
+----------+----------+------+----------+------+