Datax 是阿里开源的数据同步工具, Datax-web 是开源的Datax界面操作工具, Datax-web由国内的开发者编写, 非常感谢!
Datax下载地址:
https://github.com/alibaba/DataXhttps://github.com/alibaba/DataX
Datax-web源码下载地址: GitHub - WeiYe-Jing/datax-web: DataX集成可视化页面,选择数据源即可一键生成数据同步任务,支持RDBMS、Hive、HBase、ClickHouse、MongoDB等数据源,批量创建RDBMS数据同步任务,集成开源调度系统,支持分布式、增量同步数据、实时查看运行日志、监控执行器资源、KILL运行进程、数据源信息加密等。https://github.com/WeiYe-Jing/datax-web
第一步: 软件包下载及环境准备
1: 可在线或离线下载 Datax.tar.gz 软件包
2:下载 Datax-web 源码。 【重要】, 本地idea 打开后需要修改环境变量, 不能下载打包
好的压缩包, 有坑-_-.
3: 新建datax文件夹
datax, 根文件夹,
datax/admin, 存放 datax-admin
datax/executor 存放 datax-executor, datax.tar.gz
datax/mysql 存放数据库脚本
mkdir -p datax/admin
mkdir -p datax/executor
mkdir -p datax/mysql
目录截图:
4: Datax-web 用idea打开, 修改配置变量的参数名
4.1: 后端校验修改: 将 jobDesc 描述的校验去除给个默认值, 不然后面操作有问题
4.2: 配置变量名修改
如下两个文件中的环境变量修改, 从docker-compose.yml中传入, 不改的话会有问题
1: 改为大写
2: .点改为下划线_
datax-admin application.ynl 修改
修改之后的: 如果有QQ邮箱的也要改,在文件下面一点
datax-executor application.ynl 修改
修改之后的: 注意: ip地址要改为参数名, 从docker-compose中指定
4.3: Datax-web 编译 打包, 记得maven 库地址修改, 改为国内的仓库,不然会很慢
root 下 clean install 进行打包
打完包: 将build下的这个包上传至datax文件下:
上传后将datax-web-2.1.2.tar.gz解压 : tar -zxvf datax-web-2.1.2.tar.gz
目录如图所示:
解压后, 进入目录下,
将admin copy到 datax/admin文件夹中
将executor copy到 datax/executor 文件夹中
4.4: admin目录
Dockerfile 文件: 从Datax-web 官网获取删改的
FROM java:8-jdk
##设置默认语言环境
ENV LANG=C.UTF-8
ENV DATA_WEB=2.1.2
ENV DATAX_VERSION=2.1.2_1
ENV DATAX_NAME=datax-adminADD wait-for-it.sh /etc/init.d/
ADD ${DATAX_NAME}_${DATAX_VERSION}.tar.gz /opt/
RUN chmod +x /etc/init.d/wait-for-it.shENV TimeZone=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TimeZone /etc/localtime && echo $TimeZone > /etc/timezone
ARG DATA_PATH=/opt/${DATAX_NAME}/dataARG SERVER_PORT
ARG DB_USERNAME
ARG DB_PASSWORD
ARG DB_HOST
ARG DB_PORT
ARG DB_DATABASE
ARG MAIL_USERNAME
ARG MAIL_PASSWORDENV SERVER_PORT=${SERVER_PORT}
DB_USERNAME=${DB_USERNAME}
DB_PASSWORD=${DB_PASSWORD}
DB_HOST=${DB_HOST}
DB_PORT=${DB_PORT}
DB_DATABASE=${DB_DATABASE}
MAIL_USERNAME=${MAIL_USERNAME}
MAIL_PASSWORD=${MAIL_PASSWORD}
DATA_PATH=${DATA_PATH}:.
CLASSPATH=/opt/${DATAX_NAME}/lib}echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
usage()
{
cat << USAGE >&2
Usage:
$WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
-h HOST | --host=HOST Host or IP under test
-p PORT | --port=PORT TCP port under test
Alternatively, you specify the host and port as host:port
-s | --strict Only execute subcommand if the test succeeds
-q | --quiet Don't output any status messages
-t TIMEOUT | --timeout=TIMEOUT
Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit 1
}wait_for()
{
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
else
echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
fi
WAITFORIT_start_ts=$(date +%s)
while :
do
if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
nc -z $WAITFORIT_HOST $WAITFORIT_PORT
WAITFORIT_result=$?
else
(echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
WAITFORIT_result=$?
fi
if [[ $WAITFORIT_result -eq 0 ]]; then
WAITFORIT_end_ts=$(date +%s)
echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
break
fi
sleep 1
done
return $WAITFORIT_result
}wait_for_wrapper()
{
# In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
if [[ $WAITFORIT_QUIET -eq 1 ]]; then
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
else
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
fi
WAITFORIT_PID=$!
trap "kill -INT -$WAITFORIT_PID" INT
wait $WAITFORIT_PID
WAITFORIT_RESULT=$?
if [[ $WAITFORIT_RESULT -ne 0 ]]; then
echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
fi
return $WAITFORIT_RESULT
}# process arguments
while [[ $# -gt 0 ]]
do
case "$1" in
*:* )
WAITFORIT_hostport=(${1//:/ })
WAITFORIT_HOST=${WAITFORIT_hostport[0]}
WAITFORIT_PORT=${WAITFORIT_hostport[1]}
shift 1
;;
--child)
WAITFORIT_CHILD=1
shift 1
;;
-q | --quiet)
WAITFORIT_QUIET=1
shift 1
;;
-s | --strict)
WAITFORIT_STRICT=1
shift 1
;;
-h)
WAITFORIT_HOST="$2"
if [[ $WAITFORIT_HOST == "" ]]; then break; fi
shift 2
;;
--host=*)
WAITFORIT_HOST="${1#*=}"
shift 1
;;
-p)
WAITFORIT_PORT="$2"
if [[ $WAITFORIT_PORT == "" ]]; then break; fi
shift 2
;;
--port=*)
WAITFORIT_PORT="${1#*=}"
shift 1
;;
-t)
WAITFORIT_TIMEOUT="$2"
if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
shift 2
;;
--timeout=*)
WAITFORIT_TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
WAITFORIT_CLI=("$@")
break
;;
--help)
usage
;;
*)
echoerr "Unknown argument: $1"
usage
;;
esac
doneif [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
echoerr "Error: you need to provide a host and port to test."
usage
fiWAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}# Check to see if timeout is from busybox?
WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)WAITFORIT_BUSYTIMEFLAG=""
if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
WAITFORIT_ISBUSY=1
# Check if busybox timeout uses -t flag
# (recent Alpine versions don't support -t anymore)
if timeout &>/dev/stdout | grep -q -e '-t '; then
WAITFORIT_BUSYTIMEFLAG="-t"
fi
else
WAITFORIT_ISBUSY=0
fiif [[ $WAITFORIT_CHILD -gt 0 ]]; then
wait_for
WAITFORIT_RESULT=$?
exit $WAITFORIT_RESULT
else
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
wait_for_wrapper
WAITFORIT_RESULT=$?
else
wait_for
WAITFORIT_RESULT=$?
fi
fiif [[ $WAITFORIT_CLI != "" ]]; then
if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
exit $WAITFORIT_RESULT
fi
exec "${WAITFORIT_CLI[@]}"
else
exit $WAITFORIT_RESULT
fi
4.5: executor 目录:
DockerFile : 从Datax-web 官网获取删改的
FROM java:8-jdk
## 设置默认语言环境
ENV LANG=C.UTF-8
ENV DATA_WEB=2.1.2
ENV DATAX_VERSION=2.1.2_1
ENV DATAX_NAME=datax-executor
ADD ${DATAX_NAME}_${DATAX_VERSION}.tar.gz /opt/
ADD wait-for-it.sh /etc/init.d/ADD datax.tar.gz /opt/
RUN chmod +x /etc/init.d/wait-for-it.sh
ENV TimeZone=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TimeZone /etc/localtime && echo $TimeZone > /etc/timezone
ARG SERVER_PORT
ARG DATAX_ADMIN_HOST
ARG DATAX_ADMIN_PORT
ARG EXECUTOR_PORT
ARG DATA_PATH
ARG PYTHON_PATH=/opt/datax/bin/datax.py
ARG SERVICE_LOG_PATH=/opt/${DATAX_NAME}/logs
ARG JSON_PATH=/opt/${DATAX_NAME}/json
ENV SERVER_PORT=${SERVER_PORT}
DATAX_ADMIN_HOST=${DATAX_ADMIN_HOST}
DATAX_ADMIN_PORT=${DATAX_ADMIN_PORT}
EXECUTOR_PORT=${EXECUTOR_PORT}
CLASSPATH=/opt/${DATAX_NAME}/lib}echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
usage()
{
cat << USAGE >&2
Usage:
$WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
-h HOST | --host=HOST Host or IP under test
-p PORT | --port=PORT TCP port under test
Alternatively, you specify the host and port as host:port
-s | --strict Only execute subcommand if the test succeeds
-q | --quiet Don't output any status messages
-t TIMEOUT | --timeout=TIMEOUT
Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit 1
}wait_for()
{
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
else
echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
fi
WAITFORIT_start_ts=$(date +%s)
while :
do
if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
nc -z $WAITFORIT_HOST $WAITFORIT_PORT
WAITFORIT_result=$?
else
(echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
WAITFORIT_result=$?
fi
if [[ $WAITFORIT_result -eq 0 ]]; then
WAITFORIT_end_ts=$(date +%s)
echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
break
fi
sleep 1
done
return $WAITFORIT_result
}wait_for_wrapper()
{
# In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
if [[ $WAITFORIT_QUIET -eq 1 ]]; then
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
else
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
fi
WAITFORIT_PID=$!
trap "kill -INT -$WAITFORIT_PID" INT
wait $WAITFORIT_PID
WAITFORIT_RESULT=$?
if [[ $WAITFORIT_RESULT -ne 0 ]]; then
echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
fi
return $WAITFORIT_RESULT
}# process arguments
while [[ $# -gt 0 ]]
do
case "$1" in
*:* )
WAITFORIT_hostport=(${1//:/ })
WAITFORIT_HOST=${WAITFORIT_hostport[0]}
WAITFORIT_PORT=${WAITFORIT_hostport[1]}
shift 1
;;
--child)
WAITFORIT_CHILD=1
shift 1
;;
-q | --quiet)
WAITFORIT_QUIET=1
shift 1
;;
-s | --strict)
WAITFORIT_STRICT=1
shift 1
;;
-h)
WAITFORIT_HOST="$2"
if [[ $WAITFORIT_HOST == "" ]]; then break; fi
shift 2
;;
--host=*)
WAITFORIT_HOST="${1#*=}"
shift 1
;;
-p)
WAITFORIT_PORT="$2"
if [[ $WAITFORIT_PORT == "" ]]; then break; fi
shift 2
;;
--port=*)
WAITFORIT_PORT="${1#*=}"
shift 1
;;
-t)
WAITFORIT_TIMEOUT="$2"
if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
shift 2
;;
--timeout=*)
WAITFORIT_TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
WAITFORIT_CLI=("$@")
break
;;
--help)
usage
;;
*)
echoerr "Unknown argument: $1"
usage
;;
esac
doneif [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
echoerr "Error: you need to provide a host and port to test."
usage
fiWAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}# Check to see if timeout is from busybox?
WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)WAITFORIT_BUSYTIMEFLAG=""
if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
WAITFORIT_ISBUSY=1
# Check if busybox timeout uses -t flag
# (recent Alpine versions don't support -t anymore)
if timeout &>/dev/stdout | grep -q -e '-t '; then
WAITFORIT_BUSYTIMEFLAG="-t"
fi
else
WAITFORIT_ISBUSY=0
fiif [[ $WAITFORIT_CHILD -gt 0 ]]; then
wait_for
WAITFORIT_RESULT=$?
exit $WAITFORIT_RESULT
else
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
wait_for_wrapper
WAITFORIT_RESULT=$?
else
wait_for
WAITFORIT_RESULT=$?
fi
fiif [[ $WAITFORIT_CLI != "" ]]; then
if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
exit $WAITFORIT_RESULT
fi
exec "${WAITFORIT_CLI[@]}"
else
exit $WAITFORIT_RESULT
fi
4.6: mysql 目录
1:进入解压的 datax-web-2.1.2文件夹/bin目录下
cd datax-web-2.1.2/bin
2:复制 db文件夹到 mysql文件夹
cp -R db/ /software/datax/mysql/
mysql目录及文件显示:
4.6: docker-compose 文件内容: 从 Datax-web官网获取删改:
version: '3.7'
services:
mysql:
image: mysql:5.7
restart: always
environment:
MYSQL_ROOT_PASSWORD: root95271@2022!
#MYSQL_USER: root
MYSQL_DATABASE: dataxweb
TZ: Asia/Shanghai
character-set-server: utf8mb4
collation-server: utf8mb4_unicode_ci
ports:
- 3307:3306
volumes:
- ./mysql/data:/var/lib/mysql
- ./mysql/conf:/etc/mysql/conf.d
- ./mysql/logs:/logs
- ./mysql/db:/docker-entrypoint-initdb.d/
container_name: mysqldatax_admin:
#本地镜像,下一步build的镜像
image: datax-admin
restart: always
ports:
- 9527:9527
environment:
SERVER_PORT: 9527
DB_USERNAME: root
DB_PASSWORD: root95271@2022!
DB_HOST: mysql
DB_PORT: 3306
DB_DATABASE: dataxweb
MAIL_USERNAME: email_name
MAIL_PASSWORD: email_password
depends_on:
- mysql
container_name: admin
datax_executor:#本地镜像,下一步build的镜像
image: datax-executor
restart: always
volumes:
- ./datax_web/executor/python:/opt/datax-executor/python
- ./datax_web/executor/json:/opt/datax-executor/json
- ./datax_web/executor/data:/opt/datax-executor/data
ports:
- 8085:8081
environment:
SERVER_PORT: 8081
DATAX_ADMIN_HOST: admin
DATAX_ADMIN_PORT: 9527
EXECUTOR_PORT: 9999
DATA_PATH: ./data
#此处检测 dataxWebAdmin是否成功启动,如果将container_name和container_name.SERVER_PORT改变,请将下面的命令做响应该改变
entrypoint: "/etc/init.d/wait-for-it.sh dataxWebAdmin:9527 -- java com.wugui.datax.executor.DataXExecutorApplication"
depends_on:
- mysql
- datax_admin
container_name: executor
4.7:镜像制作及 docker-compose 启动服务及验证
admin 镜像:
1: cd datax/admin
2: docker build -t datax-admin .
成功截图:
executor: 镜像:
1: cd datax/executor
2: docker build -t datax-executor .
成功截图:
docker-compose 启动服务:
在datax目录下执行:
docker-compose up -d
成功截图:
mysql启动确认:
docker logs -f mysql
admin启动确认:
docker logs -f admin
executor 启动确认:
docker logs -f executor
成功截图:
界面访问确认:
至此就完成了, Datax-web步骤:
1: 创建数据源,
2:创建Datax任务模版
3: 构建任务
4: 执行任务
6: 查看执行任务日志。



