1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| --建表 DROP TABLE IF EXISTS dwd_display_log; CREATE EXTERNAL TABLE dwd_display_log( `area_code` STRING COMMENT '地区编码', `brand` STRING COMMENT '手机品牌', `channel` STRING COMMENT '渠道', `is_new` STRING COMMENT '是否首次启动', `model` STRING COMMENT '手机型号', `mid_id` STRING COMMENT '设备id', `os` STRING COMMENT '操作系统', `user_id` STRING COMMENT '会员id', `version_code` STRING COMMENT 'app版本号', `during_time` BIGINT COMMENT 'app版本号', `page_item` STRING COMMENT '目标id ', `page_item_type` STRING COMMENT '目标类型', `last_page_id` STRING COMMENT '上页类型', `page_id` STRING COMMENT '页面ID ', `source_type` STRING COMMENT '来源类型', `ts` BIGINT COMMENT 'app版本号', `display_type` STRING COMMENT '曝光类型', `item` STRING COMMENT '曝光对象id ', `item_type` STRING COMMENT 'app版本号', `order` BIGINT COMMENT '曝光顺序', `pos_id` BIGINT COMMENT '曝光位置' ) COMMENT '曝光日志表' PARTITIONED BY (`dt` STRING) STORED AS PARQUET LOCATION '/warehouse/gmall/dwd/dwd_display_log' TBLPROPERTIES('parquet.compression'='lzo'); --加载数据 insert overwrite table dwd_display_log partition(dt='2020-06-14') select get_json_object(line,'$.common.ar'), get_json_object(line,'$.common.ba'), get_json_object(line,'$.common.ch'), get_json_object(line,'$.common.is_new'), get_json_object(line,'$.common.md'), get_json_object(line,'$.common.mid'), get_json_object(line,'$.common.os'), get_json_object(line,'$.common.uid'), get_json_object(line,'$.common.vc'), get_json_object(line,'$.page.during_time'), get_json_object(line,'$.page.item'), get_json_object(line,'$.page.item_type'), get_json_object(line,'$.page.last_page_id'), get_json_object(line,'$.page.page_id'), get_json_object(line,'$.page.source_type'), get_json_object(line,'$.ts'), get_json_object(display,'$.display_type'), get_json_object(display,'$.item'), get_json_object(display,'$.item_type'), get_json_object(display,'$.order'), get_json_object(display,'$.pos_id') from ods_log lateral view explode_json_array(get_json_object(line,'$.displays')) tmp as display where dt='2020-06-14' and get_json_object(line,'$.displays') is not null;
|