由于一个文件可能很大,因此每个拆分文件也可能很大。
例:
源文件大小:5GB
数字分割:5:目的地
档案大小:每个1GB(5个档案)
即使我们有这样的内存,也无法一口气读取这个大的拆分块。基本上,对于每个拆分,我们都可以读取一个固定大小
byte-array,我们知道该大小在性能以及内存方面都是可行的。
NumSplits:10个MaxReadBytes:8KB
public static void main(String[] args) throws Exception { RandomAccessFile raf = new RandomAccessFile("test.csv", "r"); long numSplits = 10; //from user input, extract it from args long sourceSize = raf.length(); long bytesPerSplit = sourceSize/numSplits ; long remainingBytes = sourceSize % numSplits; int maxReadBufferSize = 8 * 1024; //8KB for(int destIx=1; destIx <= numSplits; destIx++) { BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+destIx)); if(bytesPerSplit > maxReadBufferSize) { long numReads = bytesPerSplit/maxReadBufferSize; long numRemainingRead = bytesPerSplit % maxReadBufferSize; for(int i=0; i<numReads; i++) { readWrite(raf, bw, maxReadBufferSize); } if(numRemainingRead > 0) { readWrite(raf, bw, numRemainingRead); } }else { readWrite(raf, bw, bytesPerSplit); } bw.close(); } if(remainingBytes > 0) { BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+(numSplits+1))); readWrite(raf, bw, remainingBytes); bw.close(); } raf.close(); } static void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException { byte[] buf = new byte[(int) numBytes]; int val = raf.read(buf); if(val != -1) { bw.write(buf); } }


