1.背景
生产上数据量很大达到千万级,而开发库中只有几万条,为了测试接口性能,查询效率,故需要在开发库插入大批量数据,来测试其性能。
实验了两种:
实现方式有多种
参见 https://blog.csdn.net/weixin_42740530/article/details/104043057
jdbc 直连效率最高,减少不必要的开销
一种是单条插入
powershell
package com.zatech.bank.message.operator.common.utils;
import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
public class BatchInsertData {
private static String url = "*******";
private static String user = "*****";
private static String password = "*******";
public static void main(String[] args) {
Connection con = null;
PreparedStatement ps=null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
con = DriverManager.getConnection(url, user, password);
String sql = "INSERT INTO `test`( `request_id`,`register_id`, `template_channel`, `template_code`, `device_id`, `app`, "+
"`jump_action`, `title`, `content`, `status`, `creator`, `modifier`, `gmt_modified`, `gmt_created`)"+
" VALUES (?,?, '', '', 'test', 'android', 'xxxxxx', '{title}',"+
" '{content}', '0', '', '', '2021-09-30 14:57:10', '2021-09-30 14:57:10')";
ps = con.prepareStatement(sql);
long time = System.currentTimeMillis();
for(int i=382903;i<500000;i++){
ps.setString(1,"request_id:"+i);
ps.setString(2,"register_id:"+i);
ps.addBatch();
}
ps.executeBatch();
System.out.println("批量入20万数据,耗费了"+(System.currentTimeMillis()-time)+"ms");
//批量入20万数据,耗费了2860268ms
} catch (Exception e) {
e.printStackTrace();
}finally {
if (ps!=null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (con!=null) {
try {
con.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
第二种. 多条拼接插入
package com.zatech.bank.message.operator.common.utils;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Date;
public class BatchInsertData {
private static String url = "*****";
private static String user = "*****";
private static String password = "******";
public static void main(String[] args) {
Connection con = null;
PreparedStatement ps=null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
con = DriverManager.getConnection(url, user, password);
con.setAutoCommit(false);
System.out.println("开始批量插入数据..."+new Date());
String sql = "INSERT INTO `test`( `request_id`,`register_id`, `template_channel`, `template_code`, `device_id`, `app`, "+
"`jump_action`, `title`, `content`, `status`, `creator`, `modifier`, `gmt_modified`, `gmt_created`)"+
" VALUES (?,?, '', '', 'test', 'xxxxxxxxx', '{title}',"+
" '{content}', '0', '', '', '2021-09-30 14:57:10', '2021-09-30 14:57:10')";
//进行sql 批量拼接
String batchsql = "";
for(int j=0;j<10;j++) {
batchsql =batchsql+",(?,?, '', '', 'test', 'android', 'xxxxxx', '{title}'," +
" '{content}', '0', '', '', '2021-09-30 14:57:10', '2021-09-30 14:57:10')";
}
batchsql = sql+batchsql;
ps = con.prepareStatement(batchsql);
long time = System.currentTimeMillis();
for(int i=100000;i<150000;i++){
for(int a=1;a<12;a++) {
int c=2*a-1;
ps.setString(c,"request_id:"+i);
int d= 2*a;
ps.setString(d,"register_id:"+i);
}
ps.addBatch();
if (i > 0 && (i % 10000 == 0))
{
ps.executeBatch();
con.commit();
}
}
long begin = System.currentTimeMillis();
ps.executeBatch();
con.commit();
long end = System.currentTimeMillis();
System.out.println("批量入50万数据,sql插入耗费了"+(end-begin)+"ms");
System.out.println("批量入50万数据,总耗费了"+(System.currentTimeMillis()-time)+"ms");
} catch (Exception e) {
e.printStackTrace();
}finally {
if (ps!=null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (con!=null) {
try {
con.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
开始批量插入数据…Mon Oct 11 13:43:59 CST 2021
批量入50万数据,sql插入耗费了341865ms
批量入50万数据,总耗费了1161785ms
可以看出来,多条插入比单条插入快多了
扩展:为什么单条插入比多条插入慢?
普通插入SQL:INSERT INTO TBL_TEST (id) VALUES(1)
多值插入SQL:INSERT INTO TBL_TEST (id) VALUES (1), (2), (3)
使用多值插入SQL,SQL语句的总长度减少,即减少了网络IO,同时也降低了连接次数,数据库一次SQL解析,能够插入多条数据。
参考:https://www.cnblogs.com/aicro/p/3851434.html
表的存储引擎为InnoDB



