Tesseract-OCR下载地址:https://github.com/UB-Mannheim/tesseract/wiki,下载tesseract安装包
安装tesseract,双击进行运行安装。全部点next就行。自己选择安装的位置
配置tesseract环境变量系统环境变量–>path,添加Tesseract-OCR的路径位置
系统变量–>新建
变量名:TESSDATA_PREFIX
变量值: D:java工具Tesseract-ocrtessdata 你Tesseract安装的位置我是默认的安装位置
这一步是添加Tesseract-OCR的语言库,语言库地址:https://github.com/tesseract-ocr/tessdata。你还可以根据实际情况去自己训练自己的语言
Tesseract-ORC与java代码结合。截图获取图片中信息。不截屏可以直接换成图片地址。
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class xxx {
public static void main(String[] args) {
try {
jp();
//图片文件:此图片是需要被识别的图片
File file = new File("D:\java工具\Tesseract-ocr\tessdata\图片\test.png");
System.out.println("begin.........");
String recognizeText1 = new test01().recognizeText(file);
System.out.print(recognizeText1+"t");
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
//截屏
public static void jp(){
Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
// 创建需要截取的矩形区域
Rectangle rect = new Rectangle(0, 0, screenSize.width, screenSize.height);
// 截屏操作
BufferedImage bufImage = null;
try {
bufImage = new Robot().createScreenCapture(rect); //存入截图
if (ImageIO.write(bufImage, "PNG", new File("D:\java工具\Tesseract-ocr\tessdata\图片\test.png"))) {
System.out.println("截屏成功");
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
调用下面的test01方法,进行读取数据
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class test01 {
private final String LANG_OPTION = "-l";
private final String EOL = System.getProperty("line.separator");
private String tessPath = "D://java工具//Tesseract-ocr";
public String recognizeText(File imageFile) throws Exception {
File outputFile = new File(imageFile.getParentFile(), "output");
StringBuffer strB = new StringBuffer();
List cmd = new ArrayList();
cmd.add(tessPath + "\tesseract");
cmd.add("");
cmd.add(outputFile+"\"+outputFile.getName());
cmd.add(LANG_OPTION);
// cmd.add("chi_sim");// 切换中文识别语言或者你自己训练的语言
cmd.add("eng");
ProcessBuilder pb = new ProcessBuilder();
pb.directory(new File(tessPath));
cmd.set(1, imageFile.getParentFile()+"\"+imageFile.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
long startTime = System.currentTimeMillis();
System.out.println("开始时间:" + startTime);
Process process = pb.start();
int w = process.waitFor();
if (w == 0)// 0代表正常退出
{
BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(outputFile+"\"+outputFile.getName()+".txt"),
"UTF-8"));
String str;
while ((str = in.readLine()) != null) {
strB.append(str).append(EOL);
}
in.close();
long endTime = System.currentTimeMillis();
System.out.println("结束时间:" + endTime);
System.out.println("耗时:" + (endTime - startTime) + "毫秒");
} else {
String msg;
switch (w) {
case 1:
msg = "Errors accessing files. There may be spaces in your image's filename.";
break;
case 29:
msg = "Cannot recognize the image or its selected region.";
break;
case 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
throw new RuntimeException(msg);
}
// 删除生成的文件
new File(outputFile+"\"+outputFile.getName()+".txt").delete();
return strB.toString().replaceAll("\s*", "");
}
}
结果:



