栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Java

Java多线程实现快速切分文件的程序

Java 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

Java多线程实现快速切分文件的程序

前段时间需要进行大批量数据导入,DBA给提供的是CVS文件,但是每个CVS文件都好几个GB大小,直接进行load,数据库很慢还会产生内存不足的问题,为了实现这个功能,写了个快速切分文件的程序。

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
 
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
 
public class FileSplitUtil {
 
  private final static Logger log = LogManager.getLogger(FileSplitUtil.class);
  private static final long originFileSize = 1024 * 1024 * 100;// 100M
  private static final int blockFileSize = 1024 * 1024 * 64;// 防止中文乱码,必须取2的N次方
  
  private static final char cvsSeparator = '^';
  public static void main(String args[]){
    long start = System.currentTimeMillis();
    try {
      String fileName = "D:\csvtest\aa.csv";
      File sourceFile = new File(fileName);
      if (sourceFile.length() >= originFileSize) {
 String cvsFileName = fileName.replaceAll("\\", "/");
 FileSplitUtil fileSplitUtil = new FileSplitUtil();
 List parts=fileSplitUtil.splitBySize(cvsFileName, blockFileSize);
 for(String part:parts){
   System.out.println("partName is:"+part);
 }
      }
      System.out.println("总文件长度"+sourceFile.length()+",拆分文件耗时:" + (System.currentTimeMillis() - start) + "ms.");
    }catch (Exception e){
      log.info(e.getStackTrace());
    }
 
  }
 
 
 
  
  public List splitBySize(String fileName, int byteSize)
      throws IOException, InterruptedException {
    List parts = new ArrayList();
    File file = new File(fileName);
    int count = (int) Math.ceil(file.length() / (double) byteSize);
    int countLen = (count + "").length();
    RandomAccessFile raf = new RandomAccessFile(fileName, "r");
    long totalLen = raf.length();
    CountDownLatch latch = new CountDownLatch(count);
 
    for (int i = 0; i < count; i++) {
      String partFileName = file.getPath() + "."
   + leftPad((i + 1) + "", countLen, '0') + ".cvs";
      int readSize=byteSize;
      long startPos=(long)i * byteSize;
      long nextPos=(long)(i+1) * byteSize;
      if(nextPos>totalLen){
 readSize= (int) (totalLen-startPos);
      }
      new SplitRunnable(readSize, startPos, partFileName, file, latch).run();
      parts.add(partFileName);
    }
    latch.await();//等待所有文件写完
    //由于切割时可能会导致行被切断,加工所有的的分割文件,合并行
    mergeRow(parts);
    return parts;
  }
 
  
  private class SplitRunnable implements Runnable {
    int byteSize;
    String partFileName;
    File originFile;
    long startPos;
    CountDownLatch latch;
    public SplitRunnable(int byteSize, long startPos, String partFileName,
 File originFile, CountDownLatch latch) {
      this.startPos = startPos;
      this.byteSize = byteSize;
      this.partFileName = partFileName;
      this.originFile = originFile;
      this.latch = latch;
    }
 
    public void run() {
      RandomAccessFile rFile;
      OutputStream os;
      try {
 rFile = new RandomAccessFile(originFile, "r");
 byte[] b = new byte[byteSize];
 rFile.seek(startPos);// 移动指针到每“段”开头
 int s = rFile.read(b);
 os = new FileOutputStream(partFileName);
 os.write(b, 0, s);
 os.flush();
 os.close();
 latch.countDown();
      } catch (IOException e) {
 log.error(e.getMessage());
 latch.countDown();
      }
    }
  }
 
  
  private void mergeRow(List parts) {
    List partFiles = new ArrayList();
    try {
      //组装被切分表对象
      for (int i=0;i=1){
     String prePartFile=parts.get(i - 1);
     String preEndRow = readLastLine(prePartFile);
     partFile.setFirstIsFull(getCharCount(firstRow+preEndRow)>getCharCount(secondRow));
   }
 
   partFiles.add(partFile);
   reader.close();
 }
      }
      //进行需要合并的行的写入
      for (int i = 0; i < partFiles.size() - 1; i++) {
 PartFile partFile = partFiles.get(i);
 PartFile partFileNext = partFiles.get(i + 1);
 StringBuilder sb = new StringBuilder();
 if (partFileNext.getFirstIsFull()) {
   sb.append("rn");
   sb.append(partFileNext.getFirstRow());
 } else {
   sb.append(partFileNext.getFirstRow());
 }
 writeLastLine(partFile.getPartFileName(),sb.toString());
      }
    } catch (Exception e) {
      log.error(e.getMessage());
    }
  }
 
  
  private int getCharCount(String s) {
    int count = 0;
    for (int i = 0; i < s.length(); i++) {
      if (s.charAt(i) == cvsSeparator) {
 count++;
      }
    }
    return count;
  }
 
  
  public int getFileRow(String filename) throws IOException {
    InputStream is = new BufferedInputStream(new FileInputStream(filename));
    byte[] c = new byte[1024];
    int count = 0;
    int readChars = 0;
    while ((readChars = is.read(c)) != -1) {
      for (int i = 0; i < readChars; ++i) {
 if (c[i] == 'n')
   ++count;
      }
    }
    is.close();
    return count;
  }
 
  
  private String readLastLine(String filename) throws IOException {
    // 使用RandomAccessFile , 从后找最后一行数据
    RandomAccessFile raf = new RandomAccessFile(filename, "r");
    long len = raf.length();
    String lastLine = "";
    if(len!=0L) {
      long pos = len - 1;
      while (pos > 0) {
 pos--;
 raf.seek(pos);
 if (raf.readByte() == 'n') {
   lastLine = raf.readLine();
   lastLine=new String(lastLine.getBytes("8859_1"), "gbk");
   break;
 }
      }
    }
    raf.close();
    return lastLine;
  }
  
  private void writeLastLine(String fileName,String lastString){
    try {
      // 打开一个随机访问文件流,按读写方式
      RandomAccessFile randomFile = new RandomAccessFile(fileName, "rw");
      // 文件长度,字节数
      long fileLength = randomFile.length();
      //将写文件指针移到文件尾。
      randomFile.seek(fileLength);
      //此处必须加gbk,否则会出现写入乱码
      randomFile.write(lastString.getBytes("gbk"));
      randomFile.close();
    } catch (IOException e) {
      log.error(e.getMessage());
    }
  }
  
  public static String leftPad(String str, int length, char ch) {
    if (str.length() >= length) {
      return str;
    }
    char[] chs = new char[length];
    Arrays.fill(chs, ch);
    char[] src = str.toCharArray();
    System.arraycopy(src, 0, chs, length - src.length, src.length);
    return new String(chs);
  }
 
  
  class PartFile {
    private String partFileName;
    private String firstRow;
    private String endRow;
    private boolean firstIsFull;
 
    public String getPartFileName() {
      return partFileName;
    }
 
    public void setPartFileName(String partFileName) {
      this.partFileName = partFileName;
    }
 
    public String getFirstRow() {
      return firstRow;
    }
 
    public void setFirstRow(String firstRow) {
      this.firstRow = firstRow;
    }
 
    public String getEndRow() {
      return endRow;
    }
 
    public void setEndRow(String endRow) {
      this.endRow = endRow;
    }
 
    public boolean getFirstIsFull() {
      return firstIsFull;
    }
 
    public void setFirstIsFull(boolean firstIsFull) {
      this.firstIsFull = firstIsFull;
    }
  }
 
}

以上就是本文的全部内容,希望对大家学习java程序设计有所帮助。

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/149889.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号