最近做一个功能用户上传文件的时候文件比对是否重复,但是文件很大的话这样子MD5会非常慢,然后网上也没有多线程的实现,就自己写了一个
这里有个坑, byte[] buffer = new byte[100 * 1024];修改这个字节长度的时候每次MD5都会不一样,是因为:如果这个buffer有空的数据也会被MessageDigest.update计算进去比如byte[] buffer={0,0,0,0} 或者像这样,这里是 org.apache.commons.codec.digest下的DigestUtils的源码,将size设置为一样大小 整体思路是参照文件多线程复制的思路,将文件分成几段然后分别去MD5,这样子有很多条MD5信息,然后拿到MD5list,重新MD5生成一个新的MD5信息参考链接
多线程复制文件
java 计算文件MD5值 大文件
package utils;
import java.io.*;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
public class FileMd5 {
public static String getMD5Message(File file){
List list = startThread(5, file.length(), file);
StringBuilder stringBuilder = new StringBuilder();
list.forEach(futureTask -> {
try {
stringBuilder.append(futureTask.get());
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
});
return FileMd5Util.getMD5String(stringBuilder.toString());
}
public static void main(String[] args) {
long start = System.currentTimeMillis();
String md5Message = FileMd5.getMD5Message(new File("文件"));
System.out.println(md5Message);
}
public static List startThread(int threadnum, long fileLength,File file) {
List futureTaskList=new ArrayList<>();
long modLength= fileLength % threadnum;
long desLength = fileLength / threadnum;
for (int i = 0; i < threadnum; i++) {
FileMd5Message fileMd5Message = new FileMd5Message((desLength * i), (desLength * (i + 1)), file);
System.out.println(fileMd5Message.toString());
FutureTask futureTask = new FutureTask(fileMd5Message);
futureTaskList.add(futureTask);
Thread thread = new Thread(futureTask);
thread.setName("线程"+i);
thread.start();
}
if (modLength != 0) {
FileMd5Message fileMd5Message = new FileMd5Message((desLength * threadnum), desLength * threadnum + modLength + 1, file);
System.out.println(fileMd5Message.toString());
FutureTask futureTask = new FutureTask(fileMd5Message);
futureTaskList.add(futureTask);
Thread thread = new Thread(futureTask);
thread.setName("最后一个线程");
thread.start();
}
return futureTaskList;
}
static class FileMd5Message implements Callable {
// 开始位置
private long begin;
// 结束位置
private long end;
@Override
public String toString() {
return "FileMd5Message{" +
"begin=" + begin +
", end=" + end +
", file=" + file +
'}';
}
private File file ;
private FileInputStream fileInputStream;
private BufferedInputStream bufferedInputStream;
public FileMd5Message(long begin, long end, File file) {
this.begin = begin;
this.end = end;
this.file = file;
try {
fileInputStream = new FileInputStream(file);
bufferedInputStream=new BufferedInputStream(fileInputStream,50*1024);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
@Override
public Object call() throws Exception {
bufferedInputStream.skip(begin);
MessageDigest messagedigest = MessageDigest.getInstance("MD5");
byte[] buffer = new byte[Math.toIntExact(end - begin)];
int numRead = 0;
while (begin < end && -1 != (numRead = bufferedInputStream.read(buffer))){
begin += numRead;
messagedigest.update(buffer, 0, numRead);
}
String s= FileMd5Util.bufferToHex(messagedigest.digest());
fileInputStream.close();
bufferedInputStream.close();
return s;
}
}
}
FileMd5Util工具类
package utils;
import java.io.*;
import java.nio.ByteBuffer;
import java.security.MessageDigest;
public class FileMd5Util {
protected static char hexDigits[] = { '0', '1', '2', '3', '4', '5', '6',
'7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
protected static MessageDigest messagedigest = null;
static {
try {
messagedigest = MessageDigest.getInstance("MD5");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String getMD5String(String s) {
return getMD5String(s.getBytes());
}
public static boolean checkPassword(String md5, String md5PwdStr) {
return md5.equals(md5PwdStr);
}
public static String getFileMD5String(File file) throws IOException {
InputStream fis;
fis = new FileInputStream(file);
byte[] buffer = new byte[1024];
int numRead = 0;
while ((numRead = fis.read(buffer)) > 0) {
messagedigest.update(buffer, 0, numRead);
}
fis.close();
return bufferToHex(messagedigest.digest());
}
public static String getMD5String(byte[] bytes) {
messagedigest.update(bytes);
return bufferToHex(messagedigest.digest());
}
static String bufferToHex(byte bytes[]) {
return bufferToHex(bytes, 0, bytes.length);
}
private static String bufferToHex(byte bytes[], int m, int n) {
StringBuffer stringbuffer = new StringBuffer(2 * n);
int k = m + n;
for (int l = m; l < k; l++) {
appendHexPair(bytes[l], stringbuffer);
}
return stringbuffer.toString();
}
private static void appendHexPair(byte bt, StringBuffer stringbuffer) {
char c0 = hexDigits[(bt & 0xf0) >> 4];// 取字节中高 4 位的数字转换, >>> 为逻辑右移,将符号位一起右移,此处未发现两种符号有何不同
char c1 = hexDigits[bt & 0xf];// 取字节中低 4 位的数字转换
stringbuffer.append(c0);
stringbuffer.append(c1);
}
public static String getMD5String(InputStream inputStream) throws IOException {
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream,256*1024);
ByteBuffer byteBuffer = ByteBuffer.allocate(256 * 1024);
byte[] buffer = new byte[256*1024];
int numRead = 0;
while ((numRead = inputStream.read(buffer)) > 0) {
messagedigest.update(buffer, 0, numRead);
}
bufferedInputStream.close();
inputStream.close();
return bufferToHex(messagedigest.digest());
}
}



