hive UDF,
use mwd;
--prepare data, c_ip 为左补足0转换后值
-- ETL: spv data
create temporary function row_sequence as 'com.mwd.hive.udf.RowSequence';
create temporary function parse_agent as 'com.mwd.hive.udf.HdfsRegexParseUdf';
create temporary function uri_to_map as 'com.mwd.hive.udf.GenericUDFUriToMap';
insert into table wdlog_ready_spv PARTITION (import_time='20131111') select row_sequence(), cur_date, concat_ws(' ', cur_date, cur_time), cs_uri_query, printf('%s%s%s%s', lpad(split(c_ip,'\\\.')[0],3,'0'), lpad(split(c_ip,'\\\.')[1],3,'0'), lpad(split(c_ip,'\\\.')[2],3,'0'), lpad(split(c_ip,'\\\.')[3],3,'0')), agent, if(instr(cs_uri_query, '&\\;') > 0, uri_to_map(cs_uri_query,'&\\;', '=','/user/searchdata','/user/mediadata'), uri_to_map(cs_uri_query, '&', '=','/user/searchdata','/user/mediadata')), parse_agent(agent,'/user/reg_data') from wdlog where import_time='20131111' and cs_uri_query is not null;
drop temporary function parse_agent;
drop temporary function uri_to_map;
drop temporary function row_sequence;
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。