add 新增谷歌开发并开源的图像文字识别引擎
This commit is contained in:
parent
eda67dd572
commit
7cdc3cfeb2
10
pom.xml
10
pom.xml
@ -40,6 +40,9 @@
|
|||||||
<!-- 离线IP地址定位库 -->
|
<!-- 离线IP地址定位库 -->
|
||||||
<ip2region.version>2.7.0</ip2region.version>
|
<ip2region.version>2.7.0</ip2region.version>
|
||||||
|
|
||||||
|
<!-- 谷歌开发并开源的图像文字识别引擎 -->
|
||||||
|
<tesseract.version>5.13.0</tesseract.version>
|
||||||
|
|
||||||
<!-- OSS 配置 -->
|
<!-- OSS 配置 -->
|
||||||
<aws.sdk.version>2.28.22</aws.sdk.version>
|
<aws.sdk.version>2.28.22</aws.sdk.version>
|
||||||
<aws.crt.version>0.31.3</aws.crt.version>
|
<aws.crt.version>0.31.3</aws.crt.version>
|
||||||
@ -316,6 +319,13 @@
|
|||||||
<version>${ip2region.version}</version>
|
<version>${ip2region.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- 谷歌开发并开源的图像文字识别引擎 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.sourceforge.tess4j</groupId>
|
||||||
|
<artifactId>tess4j</artifactId>
|
||||||
|
<version>${tesseract.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-io</groupId>
|
<groupId>commons-io</groupId>
|
||||||
<artifactId>commons-io</artifactId>
|
<artifactId>commons-io</artifactId>
|
||||||
|
BIN
ruoyi-admin/src/main/resources/traineddata/chi_sim.traineddata
Normal file
BIN
ruoyi-admin/src/main/resources/traineddata/chi_sim.traineddata
Normal file
Binary file not shown.
@ -94,6 +94,12 @@
|
|||||||
<artifactId>ip2region</artifactId>
|
<artifactId>ip2region</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- 谷歌开发并开源的图像文字识别引擎 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.sourceforge.tess4j</groupId>
|
||||||
|
<artifactId>tess4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
package org.dromara.common.core.utils.ocr;
|
||||||
|
|
||||||
|
import cn.hutool.core.io.FileUtil;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import net.sourceforge.tess4j.ITesseract;
|
||||||
|
import net.sourceforge.tess4j.Tesseract;
|
||||||
|
import net.sourceforge.tess4j.TesseractException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tesseract OCR 工具类,提供OCR识别功能
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
public class TesseractUtils {
|
||||||
|
private static final String DEFAULT_LANGUAGE = "chi_sim";
|
||||||
|
private static final String DATAPATH = FileUtil.getTmpDir() + FileUtil.FILE_SEPARATOR + "traineddata";
|
||||||
|
|
||||||
|
private static final ThreadLocal<ITesseract> THREAD_LOCAL_TESSERACT = ThreadLocal.withInitial(() -> {
|
||||||
|
ITesseract tesseract = new Tesseract();
|
||||||
|
tesseract.setDatapath(DATAPATH); // 设置训练数据路径
|
||||||
|
tesseract.setLanguage(DEFAULT_LANGUAGE); // 设置默认语言
|
||||||
|
return tesseract;
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 设置当前线程的语言
|
||||||
|
*
|
||||||
|
* @param language 语言代码,如 "chi_sim"(简体中文)、"eng"(英语)
|
||||||
|
*/
|
||||||
|
public static void setLanguage(String language) {
|
||||||
|
ITesseract tesseract = THREAD_LOCAL_TESSERACT.get();
|
||||||
|
tesseract.setLanguage(language);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 执行 OCR 识别操作,使用默认语言(如 "chi_sim")
|
||||||
|
*
|
||||||
|
* @param file 需要识别的图片文件
|
||||||
|
* @return 识别结果,返回图片中的文字内容
|
||||||
|
* @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常
|
||||||
|
*/
|
||||||
|
public static String performOCRWithDefaultLanguage(File file) throws TesseractException {
|
||||||
|
ITesseract tesseract = THREAD_LOCAL_TESSERACT.get();
|
||||||
|
tesseract.setLanguage(DEFAULT_LANGUAGE);
|
||||||
|
return tesseract.doOCR(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 执行 OCR 识别操作
|
||||||
|
*
|
||||||
|
* @param file 需要识别的图片文件
|
||||||
|
* @return 识别结果,返回图片中的文字内容
|
||||||
|
* @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常
|
||||||
|
*/
|
||||||
|
public static String performOCR(File file) throws TesseractException {
|
||||||
|
ITesseract tesseract = THREAD_LOCAL_TESSERACT.get();
|
||||||
|
return tesseract.doOCR(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 执行 OCR 识别操作
|
||||||
|
*
|
||||||
|
* @param file 需要识别的图片文件
|
||||||
|
* @param language 需要使用的语言(如 "chi_sim" 或 "eng")
|
||||||
|
* @return 识别结果,返回图片中的文字内容
|
||||||
|
* @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常
|
||||||
|
*/
|
||||||
|
public static String performOCR(File file, String language) throws TesseractException {
|
||||||
|
ITesseract tesseract = THREAD_LOCAL_TESSERACT.get();
|
||||||
|
// 设置线程对应的 OCR 语言
|
||||||
|
tesseract.setLanguage(language);
|
||||||
|
return tesseract.doOCR(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user