栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Java

TrWebOcr API-JAVA版本

Java 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

TrWebOcr API-JAVA版本

TrWebOCR-开源的离线OCR 介绍

TrWebOCR,基于开源项目 Tr 构建。
在其基础上提供了http调用的接口,便于你在其他的项目中调用。
并且提供了易于使用的web页面,便于调试或日常使用。

特性
  • 中文识别
    快速高识别率
  • 文字检测
    支持一定角度的旋转
  • 并发请求
    由于模型本身不支持并发,但通过tornado多进程的方式,能支持一定数量的并发请求。具体并发数取决于机器的配置。
安装说明 使用docker-compose.yml部署
version: '3.5'
services:
    trwebocrsrv:
      image: mmmz/trwebocr:latest
      container_name: trwebocrsrv
      ports:
        - 8089:8089
      volumes:
        - ./data/logs:/opt/logs
      networks:
        trwebocr:
          aliases:
            - trwebocrsrv
networks:
  trwebocr:
    name: trwebocr
    driver: bridge

访问:http://127.0.0.1:8089 即可

Java访问【基于spring-boot】
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.TypeReference;
import com.xxxxx.common.msg.ResponseData;
import com.xxxxx.common.utils.JsonUtils;
import com.xxxxx.web.module.form.Ocrbase64Form;
import com.xxxxx.web.module.vo.OrcbaseVo;
import com.xxxxx.web.module.vo.OrcDataVo;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringEscapeUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.util.linkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.validation.BindingResult;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;

import javax.servlet.http.HttpServletRequest;
import javax.validation.Valid;
import java.io.File;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.List;


@Slf4j
@Api(tags = "OCR文字识别")
@RestController
public class OcrWordController {

    private static final String OCR_WORD = "http://127.0.0.1:8089/api/tr-run/";

    @Autowired
    private RestTemplate restTemplate;

    @ApiOperation(value="OCR文字识别文件型", notes="nOCR文字识别文件型")
    @PostMapping(value = "/orc/multipart-file")
    public ResponseData getbaseFile(HttpServletRequest request){
        MultipartHttpServletRequest multipartHttpServletRequest = (MultipartHttpServletRequest)request;
        MultipartFile file = multipartHttpServletRequest.getFile("file");
        HttpHeaders headers = new HttpHeaders();
        MediaType type = MediaType.parseMediaType("multipart/form-data");
        headers.setContentType(type);
        Resource resource = file.getResource();
        MultiValueMap mapFile = new linkedMultiValueMap<>();
//        FileSystemResource resource = new FileSystemResource(resource1);
        mapFile.add("file", resource);
        HttpEntity> httpEntity = new HttpEntity>(mapFile,headers);
        ResponseEntity postForEntity = restTemplate.postForEntity(OCR_WORD, httpEntity, String.class);
        String body = postForEntity.getBody();
        String s = StringEscapeUtils.unescapeJava(body);
        OrcbaseVo orcbaseVo = JsonUtils.parseObject(s, new TypeReference>() {
        });
        if (orcbaseVo != null){
            OrcDataVo orcDataVo = orcbaseVo.getData();
            List raw_out = orcDataVo.getRaw_out();
            BigDecimal nextLineHeight;
            StringBuffer ocrText = new StringBuffer();
            int size = raw_out.size();
            for (int i = 0; i < size; i++) {
                String ocrRaw = JsonUtils.toJSONString(raw_out.get(i));
                JSONArray array = JSONArray.parseArray(ocrRaw);
                JSONArray arrayChildren = JSONArray.parseArray(JsonUtils.toJSONString(array.get(0)));
                ocrText.append(array.get(1));
                //            // 合并同一行的数据
                if (i < size - 1){
                    JSONArray arrayNext = JSONArray.parseArray(JsonUtils.toJSONString(raw_out.get(i + 1)));
                    JSONArray arrayNextChildren = JSONArray.parseArray(JsonUtils.toJSONString(arrayNext.get(0)));
                    nextLineHeight = (BigDecimal) arrayNextChildren.get(1);
                    // 判断判断同一行的依据是 两段的行高差 小于 行高的一半
                    BigDecimal  arrayChildrenBig0 =  (BigDecimal)arrayChildren.get(1);
                    BigDecimal arrayChildrenBig2 = (BigDecimal) arrayChildren.get(3);
                    if (Math.abs(arrayChildrenBig0.subtract(nextLineHeight).doublevalue()) < arrayChildrenBig2.divide(new BigDecimal("2")).doublevalue()){
                        ocrText.append(" ");
                    }else {
                        ocrText.append("r");
                    }
                }

            }
            log.info("ocrText = {}", ocrText);
            orcDataVo.setData(ocrText.toString());
            return ResponseData.newInstanceOfSuccess(orcDataVo);
        }
        return ResponseData.newInstanceOfDefaultError();
    }

    @ApiOperation(value="OCR文字识别base64Form", notes="nOCR文字识别base64Form")
    @PostMapping(value = "/orc/multipart-base64")
    public ResponseData getbasebase64(@Valid @RequestBody Ocrbase64Form form, BindingResult result){
        if (result.hasErrors()){
            return ResponseData.newInstanceOfInvalid(result);
        }
        HttpHeaders headers = new HttpHeaders();
        MultiValueMap params = new linkedMultiValueMap<>();
        params.add("img", form.getImg());
        HttpEntity> httpEntity = new HttpEntity<>(params, headers);

        ResponseEntity postForEntity = restTemplate.postForEntity(OCR_WORD, httpEntity, String.class);
        String body = postForEntity.getBody();
        String s = StringEscapeUtils.unescapeJava(body);
        OrcbaseVo orcbaseVo = JsonUtils.parseObject(s, new TypeReference>() {
        });
        if (orcbaseVo != null){
            OrcDataVo orcDataVo = orcbaseVo.getData();
            List raw_out = orcDataVo.getRaw_out();
            BigDecimal nextLineHeight;
            String ocrRaw = "";
            StringBuffer ocrText = new StringBuffer();
            int size = raw_out.size();
            for (int i = 0; i < size; i++) {
                ocrRaw = JsonUtils.toJSONString(raw_out.get(i));
                JSONArray array = JSONArray.parseArray(ocrRaw);
                JSONArray arrayChildren = JSONArray.parseArray(JsonUtils.toJSONString(array.get(0)));
                ocrText.append(array.get(1));
                //            // 合并同一行的数据
                if (i < size - 1){
                    JSONArray arrayNext = JSONArray.parseArray(JsonUtils.toJSONString(raw_out.get(i + 1)));
                    JSONArray arrayNextChildren = JSONArray.parseArray(JsonUtils.toJSONString(arrayNext.get(0)));
                    nextLineHeight = (BigDecimal) arrayNextChildren.get(1);
                    // 判断判断同一行的依据是 两段的行高差 小于 行高的一半
                    BigDecimal  arrayChildrenBig0 =  (BigDecimal)arrayChildren.get(1);
                    BigDecimal arrayChildrenBig2 = (BigDecimal) arrayChildren.get(3);
                    if (Math.abs(arrayChildrenBig0.subtract(nextLineHeight).doublevalue()) < arrayChildrenBig2.divide(new BigDecimal("2")).doublevalue()){
                        ocrText.append(" ");
                    }else {
                        ocrText.append("r");
                    }
                }

            }
            log.info("ocrText = {}", ocrText);
            orcDataVo.setData(ocrText.toString());
            return ResponseData.newInstanceOfSuccess(orcDataVo);
        }
        return ResponseData.newInstanceOfDefaultError();
    }

}

 
请求参数代码 
package com.xxx.web.module.form;
import lombok.Data;


@Data
public class Ocrbase64Form {

    private String img;
}

返回参数代码
package com.xxxx.web.module.vo;

import lombok.Data;
import lombok.ToString;


@Data
@ToString
public class OrcbaseVo {

    private Integer code;

    private String msg;

    private T data;

}

package com.xxxx.web.module.vo;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.ToString;

import java.math.BigDecimal;
import java.util.List;


@Data
@ToString
public class OrcDataVo {

    //base64图片
    @ApiModelProperty("识别文字后base64图片")
    private String img_detected;

    //输出
    @ApiModelProperty("输出坐标、识别文字位置")
    private List raw_out;

    //速度
    @ApiModelProperty("识别速度")
    private BigDecimal speed_time;

    //识别输出
    @ApiModelProperty("识别整理后输出")
    private String data;
}




转载请注明:文章转载自 www.mshxw.com
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号