第一种方法:
1、启动需要调试项目jar命令:(参考原始命令)
./bin/spark-submit --master yarn --deploy-mode cluster
--num-executors 8
--executor-cores 4
--executor-memory 12G
--driver-java-options "Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=18888"
--class com.proj.user_behavior.UserBehaviorCleaner UserBehaviorCleaner.jar
hdfs://linux102:9000/user/hive/warehouse/ods.db/origin_user_behavior/${day}
hdfs://linux102:9000/user/hive/warehouse/tmp.db/user_behavior_${day}
(测试通过命令)
./bin/spark-submit --driver-java-options "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=18888" --class com.proj.user_behavior.UserBehaviorCleaner UserBehaviorCleaner.jar hdfs://linux102:9000/user/hive/warehouse/ods.db/origin_user_behavior/20220101 hdfs://linux102:9000/user/hive/warehouse/tmp.db/user_behavior_20220101
2、之后,程序就会阻塞,接着去Idea debug运行项目即可。
第二种方法:
1、直接运行spark-shell命令
./bin/spark-shell --jars ./UserBehaviorCleaner.jar
2、之后把项目中的代码直接扔进去执行即可
scala> def repairUsername(event: String) = {
| val fields = event.split("t")
| // 取出用户昵称
| val username = fields(1)
| // 用户昵称不为空时替代"n"
| if (username) != null && !"".equals(username)) {
| fields(1) = username.replace("n","")
| }
| fields.mkString("t")
| }
repairUsername: (event: String) String



