最近在使用datax同步数据表,由于好多个表,一开始每次都需要等一个执行完再执行下一个,这样明显效率很低,于是写了个Java方法来批量操作;
环境: linux服务器
public class DataxSyncUtil {
public static void main(String[] args) throws Exception {
String jobPath = args[0];
if (jobPath == null && "".equals(jobPath.trim())) {
throw new Exception("请输入datax脚本文件夹路径");
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy_MM_dd");
String logDir = jobPath + "DataxSyncLogs";
File logDirFile = new File(logDir);
if (!logDirFile.exists()) {
logDirFile.mkdir(); // 创建日志文件夹
}
String logFileName = logDir + "/datax_sync_" + sdf.format(new Date()) + ".log";
File file = new File(logFileName);
if (!file.exists())
file.createNewFile();
FileOutputStream out = new FileOutputStream(file, true);
StringBuffer sb = new StringBuffer();
System.out.println("==========Datax Sync Job Start!==========");
sb.append("==========================================n");
sb.append("==========Datax Sync Job Start!==========n");
sb.append("==========================================n");
File f = new File(jobPath);
if (!f.exists()) {
System.out.println(jobPath + " 不存在!");
sb.append(jobPath + " 不存在!n");
return;
}
String cmdStr = "";
File fa[] = f.listFiles();
for (int i = 0; i < fa.length; i++) {
File fs = fa[i];
if (!fs.isDirectory()) {
Process pr = null;
cmdStr = "python3 datax.py " + jobPath + fs.getName();
System.out.println("start cmd: " + cmdStr);
sb.append("start cmd: " + cmdStr + "n");
pr = Runtime.getRuntime().exec(cmdStr);
BufferedReader in = new BufferedReader(new InputStreamReader(pr.getInputStream()));
String line = null;
while (true) {
if (!((line = in.readLine()) != null)) break;
System.out.println(line);
sb.append(line + "n");
}
in.close();
pr.waitFor();
}
}
System.out.println("Datax Sync Job End!");
sb.append("==========================================n");
sb.append("===========Datax Sync Job End!===========n");
sb.append("==========================================n");
out.write(sb.toString().getBytes("utf-8"));
out.close();
}
}
【注意】看一下注释的说明,将job的文件路径做为main函数的参数;



