# 安装docker yum -y install docker # 安装docker-compose curl -L "https://github.com/docker/compose/releases/download/1.27.4/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose chmod +x /usr/local/bin/docker-compose # 启动docker systemctl start docker # 检查docker是否启动成功,如果命令成功运行,则启动成功 docker ps2.安装python3(建议3.8.1以上版本,不然会出现启动datahub失败)
# 安装依赖 yum install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gcc make libffi-devel # 安装wget yum install wget # 下载python3源码包 wget https://www.python.org/ftp/python/3.8.1/Python-3.8.1.tgz # 解压压缩包 tar -zxvf Python-3.8.1.tgz # 进入文件夹 cd Python-3.8.1 # 配置安装位置 ./configure prefix=/usr/local/python3 # 安装 make && make install # 添加python3的软链接 ln -s /usr/local/python3/bin/python3.8 /usr/bin/python3 # 添加 pip3 的软链接 ln -s /usr/local/python3/bin/pip3.8 /usr/bin/pip3 # 验证是否安装成功,查看python3的版本 python3 -V3.安装datahub
python3 -m pip install --upgrade pip wheel setuptools python3 -m pip uninstall datahub acryl-datahub || true # sanity check - ok if it fails python3 -m pip install --upgrade acryl-datahub python3 -m datahub version4.启动datahub
python3 -m datahub docker quickstart5.ingest schema 5.1检查是否安装数据库插件
先运行python3 -m datahub check plugins命令查看对应类型数据库插件是否安装。我们以“mysql”为例。mysql后面没有备注disable,没有disable意味着已经安装此插件,我们可以直接进行提取schema操作。如果没有安装插件,需要运行pip install 'acryl-datahub[mysql]'命令,安装mysql插件。
5.2准备recipe文件vi mysql_test.yaml
source:
type: mysql
config:
username: root
password: root
database: testdb
host_port: xxx:3306
sink:
type: datahub-rest
config:
server: http://localhost:8080
5.3提取schema信息
python3 -m datahub ingest -c mysql_csms.yaml6.如何查看datahub存储的数据
快速启动时,docker会自动启动mysql和elasticsearch容器用于存储datahub收集的schema数据。如果需要查看datahub存储内容,可以进入容器中查看。
# 展示docker列表 [root@HW0007740 ~]# docker ps ConTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 534a8f60cf50 linkedin/datahub-frontend-react:head "datahub-frontend/bi…" 3 days ago Up 3 days (healthy) 0.0.0.0:9002->9002/tcp, :::9002->9002/tcp datahub-frontend-react 61b883008e25 confluentinc/cp-schema-registry:5.4.0 "/etc/confluent/dock…" 4 days ago Up 3 days 0.0.0.0:8081->8081/tcp, :::8081->8081/tcp schema-registry 79017e4203d5 confluentinc/cp-kafka:5.4.0 "/etc/confluent/dock…" 4 days ago Up 3 days 0.0.0.0:9092->9092/tcp, :::9092->9092/tcp, 0.0.0.0:29092->29092/tcp, :::29092->29092/tcp broker 7305e4049bc7 linkedin/datahub-gms:head "/bin/sh -c /datahub…" 4 days ago Up 3 days (healthy) 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp datahub-gms 9834c7f6dccb confluentinc/cp-zookeeper:5.4.0 "/etc/confluent/dock…" 4 days ago Up 3 days 2888/tcp, 0.0.0.0:2181->2181/tcp, :::2181->2181/tcp, 3888/tcp zookeeper 5d259c7725ef mysql:5.7 "docker-entrypoint.s…" 4 days ago Up 3 days 0.0.0.0:3306->3306/tcp, :::3306->3306/tcp, 33060/tcp mysql 6c816c50e70a elasticsearch:7.9.3 "/tini -- /usr/local…" 4 days ago Up 3 days (healthy) 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp, 9300/tcp elasticsearch [root@HW0007740 ~]# # 进入mysql container docker exec -it mysql /usr/bin/mysql datahub --user=datahub --password=datahub # 在container中进行select查询 mysql> show tables; +--------------------+ | Tables_in_datahub | +--------------------+ | metadata_aspect_v2 | | metadata_index | +--------------------+



