- 下载 Dinky 1.0.3 版本到 EC2 ,解压,本文假设 Dinky 安装目录在/opt/dinky/
cd /opt/
sudo wget https://github.com/DataLinkDC/dinky/releases/download/v1.2.1/dinky-release-1.17-1.2.1.tar.gz
sudo tar -xvf dinky-release-1.17-1.2.1.tar.gz
sudo mv dinky-release-1.17-1.2.1 dinky
dinky-release-1.17-1.2.1.tar.gz
注意版本对应关系,红色部分为dinky支持flink的版本,棕色为dinky版本
- 安装JDK并配置JAVA_HOME 环境变量
- 复制 EMR 配置文件到 Dinky 服务器
sudo mkdir -p /etc/flink/conf
sudo scp -r hadoop@<EMR Master IP>:/etc/alternatives/flink-conf/* /etc/flink/conf/
sudo mkdir -p /etc/hadoop/conf
sudo scp -r hadoop@<EMR Master IP>:/etc/alternatives/hadoop-conf/* /etc/hadoop/conf/
- 准备 mysql jdbc 驱动 jar 包
sudo wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar
sudo mv mysql-connector-java-8.0.28.jar /opt/dinky/lib/
sudo chmod 777 /opt/dinky/lib/mysql-connector-java-8.0.28.jar
- Dinky 需要具备内置的 Flink 环境,将 EMR Flink 相关环境 jar 包复制到 Dinky 服务器
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/lib/* /opt/dinky/extends/flink1.17/
sudo rm /opt/dinky/extends/flink1.17/flink-table-planner-loader-1.17.1-amzn-1.jar
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/opt/flink-table-planner_2.12-1.17.1-amzn-1.jar /opt/dinky/extends/flink1.17/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/hadoop/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/hadoop/client/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/share/aws/emr/emrfs/lib/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/plugins/s3/*.jar /opt/dinky/customJar/
- Dinky 任务在提交到 EMR 后,需要在 HDFS 中寻找依赖 jar 包,所以需要将 dinky 的部分 jar 包上传到 EMR HDFS。
#以下命令在Dinky服务器执行
sudo scp -r /opt/dinky/jar/dinky-app-1.17-1.0.3-jar-with-dependencies.jar hadoop@<EMR Master IP>:/home/hadoop/dinky/
sudo scp -r /opt/dinky/lib/mysql-connector-java-8.0.28.jar hadoop@<EMR Master IP>:/home/hadoop/dinky/
#以下命令在EMR Master节点执行
hdfs dfs -mkdir /user/hadoop/dinky/
hdfs dfs -mkdir /user/hadoop/flink/
hdfs dfs -mkdir /user/hadoop/flink/lib/
hdfs dfs -put /home/hadoop/dinky/dinky-app-1.17-1.0.3-jar-with-dependencies.jar /user/hadoop/dinky/
hdfs dfs -put /home/hadoop/dinky/mysql-connector-java-8.0.28.jar /user/hadoop/flink/lib/
hdfs dfs -put /usr/lib/flink/lib/* /user/hadoop/flink/lib/
hdfs dfs -put /usr/lib/flink/plugins/s3/* /user/hadoop/flink/lib/
hdfs dfs -rm /user/hadoop/flink/lib/flink-table-planner-loader-1.17.1-amzn-1.jar
hdfs dfs -put /usr/lib/flink/opt/flink-table-planner_2.12-1.17.1-amzn-1.jar /user/hadoop/flink/lib/
- 启动 Dinky
# 启动
cd /opt/dinky/
sudo bash auto.sh start
使用默认用户名/密码:admin/dinky123!@# 浏览器登陆 ip:8888 即可进入 Dinky 界面
- 配置dinky flink cluster
在 Dinky 控制台点击注册中心,进入后点击左侧集群-集群配置,进入集群配置列表界面,然后点击新建,创建一个集群。
--创建源表datagen_source
CREATE TABLE datagen_source(
id BIGINT,
name STRING
) WITH (
'connector' = 'datagen'
);
--创建结果表blackhole_sink
CREATE TABLE blackhole_sink(
id BIGINT,
name STRING
) WITH (
'connector' = 'blackhole'
);
--将源表数据插入到结果表
INSERT INTO blackhole_sink
SELECT
id ,
name
from datagen_source;
选择 pre-job 模式,提交测试任务
参考: https://aws.amazon.com/cn/blogs/china/building-an-emr-data-analysis-platform-based-on-open-source-tools-part-two/
https://www.dinky.org.cn/docs/1.1/get_started/overview