
(1)源表
CREATE TABLE `T_YYBZB_TGH_BANKINFO` ( `id` int(8) DEFAULT NULL, `bank_id` int(8) DEFAULT NULL, `bank_name` text CHARACTER SET utf8, `source_date` text CHARACTER SET utf8 )
(2)目标表
CREATE TABLE `ods_T_YYBZB_TGH_BANKINFO_di`( `id` int COMMENT '自增编号', `bank_id` int COMMENT '银行id', `bank_name` string COMMENT '银行名称', `etl_time` string) COMMENT '银行码表' PARTITIonED BY ( `pt` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 'hdfs://hadoop102:8020/user/hive/warehouse/ods.db/ods_t_yybzb_tgh_bankinfo_di' TBLPROPERTIES ( 'bucketing_version'='2', 'last_modified_by'='atguigu', 'last_modified_time'='1639465403', 'transient_lastDdlTime'='1639465403')开发方式1、Mysql入Hive-编写JSON脚本开发 1.1、创建部署目录
mkdir db_conf mkdir json_conf mkdir sh_conf1.2、编写数据库配置文件db.conf
mysql_username=root mysql_password=123456 mysql_ip=192.168.6.102 mysql_port=3306 mysql_sid=source hadoop102_ip=192.168.6.102 hadoop102_port=80201.3、编写Json配置文件
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://$ip:$port/$sid"],
"querySql": ["select id,bank_id,bank_name from T_YYBZB_TGH_BANKINFO"],
}
],
"password": "$password",
"username": "$username"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://$hdfs_ip:$hdfs_port",
"fileType": "text",
"path": "/user/hive/warehouse/ods.db/ods_T_YYBZB_TGH_BANKINFO_di",
"fileName": "ods_T_YYBZB_TGH_BANKINFO_di",
"column": [
{"name":"id","type":"int"},
{"name":"bank_id","type":"int"},
{"name":"bank_name","type":"string"}
],
"writeMode": "append",
"fieldDelimiter": "u0001",
"encoding": "utf-8"
}
}
}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
1.4、编写shell脚本
#!/bin/bash
#配置文件路径:
config_file=../db_conf/db.conf
#源系统名称
src_system=mysql
#目标系统名称
tag_system=hadoop102
export in_username=`grep -w ${src_system}_username ${config_file} |awk -F '=' '{print }'`
export in_password=`grep -w ${src_system}_password ${config_file} |awk -F '=' '{print }'`
export in_ip=`grep -w ${src_system}_ip ${config_file} |awk -F '=' '{print }'`
export in_port=`grep -w ${src_system}_port ${config_file} |awk -F '=' '{print }'`
export in_sid=`grep -w ${src_system}_sid ${config_file} |awk -F '=' '{print }'`
export in_hdfs_ip=`grep -w ${tag_system}_ip ${config_file} |awk -F '=' '{print }'`
export in_hdfs_port=`grep -w ${tag_system}_port ${config_file} |awk -F '=' '{print }'`
pre_day=`date -d -1day +%Y%m%d`
pre_day_mon=`date -d -1day +%Y%m`
echo ${in_username}
echo ${in_password}
echo ${in_ip}
echo ${in_port}
echo ${in_sid}
echo ${in_hdfs_ip}
echo ${in_hdfs_port}
echo ${pre_day}
echo ${pre_day_mon}
# 全量导入:
hive -e "truncate table ods.ods_T_YYBZB_TGH_BANKINFO_test_di;"
# nsrun_workgroup.json
python ../../datax.py -p"-Dusername=$in_username -Dpassword=$in_password -Dip=$in_ip -Dport=$in_port -Dsid=$in_sid -Dhdfs_ip=$in_hdfs_ip -Dhdfs_port=$in_hdfs_port" ../json_conf/bank_name.json
1.5、运行脚本
#后台运行脚本 nohup sh start.sh >> "start_log" 2>&1 & #动态查看日志 tail -f start.log开发方式2、Oracle入Hive基于datax-web端开发 2.1、创建项目
项目管理–》创建项目
2.2、DataX任务模板构建任务管理–》DataX任务模板–》添加
路由策略:轮询 阻塞处理:覆盖之前调度 Cron:每天6点调度2.3、数据源配置
数据源管理–》添加
(1)添加Mysql数据源
数据源:mysql 用户名:root 密码:123456 jdbcurl:jdbc:mysql://192.168.6.102:3306/source
(2)添加Hive数据源
数据源:hive 用户名称:atguigu 密码:123456 jdbcurl:jdbc:hive2://192.168.6.102:10000/ods2.4、任务构建
任务管理–》任务构建
(1)Reader构建
(2)Writer构建
所有配置项根据hive的建表语句配置
(3)字段映射
(4)构建并选择模板
之后点击下一步 2.5、任务管理
(1)查看任务
任务管理–》任务管理
(2)测试运行
执行一次
(3)日志管理
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)