引入jar
com.itextpdf itextpdf5.5.13 com.itextpdf itext-asian5.2.0
工具类:
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
public class PdfUtils {
public static List extractTXTbyLine(String file) {
List listArr = new ArrayList();
try {
PdfReader reader = new PdfReader(file);
int pageNum = reader.getNumberOfPages(); // 获得页数
for (int i = 1; i <= pageNum; i++) { // 只能从第1页开始读
String textFromPageContent = PdfTextExtractor.getTextFromPage(reader, i);
String[] splitArray = textFromPageContent.split("n");
if (splitArray.length > 0) {
listArr.addAll(Arrays.asList(splitArray));
}
}
} catch (IOException ex) {
Logger.getLogger(PdfUtils.class.getName()).log(Level.SEVERE, null, ex);
}
return listArr;
}
public static void main(String args[]) {
String file = "F:\文档\test.pdf";
long startTime = System.currentTimeMillis();
List strings = extractTXTbyLine(file);
for (String s : strings) {
System.out.println(s);
}
long endTime = System.currentTimeMillis();
System.out.println("读写所用时间为:" + (endTime - startTime) + "ms");
}
}



