diff --git a/pom.xml b/pom.xml index 8186079bc..3175958d2 100644 --- a/pom.xml +++ b/pom.xml @@ -40,6 +40,9 @@ 2.7.0 + + 5.13.0 + 2.28.22 0.31.3 @@ -316,6 +319,13 @@ ${ip2region.version} + + + net.sourceforge.tess4j + tess4j + ${tesseract.version} + + commons-io commons-io diff --git a/ruoyi-admin/src/main/resources/traineddata/chi_sim.traineddata b/ruoyi-admin/src/main/resources/traineddata/chi_sim.traineddata new file mode 100644 index 000000000..eeb66cfbd Binary files /dev/null and b/ruoyi-admin/src/main/resources/traineddata/chi_sim.traineddata differ diff --git a/ruoyi-common/ruoyi-common-core/pom.xml b/ruoyi-common/ruoyi-common-core/pom.xml index ad37e90db..197c6c260 100644 --- a/ruoyi-common/ruoyi-common-core/pom.xml +++ b/ruoyi-common/ruoyi-common-core/pom.xml @@ -94,6 +94,12 @@ ip2region + + + net.sourceforge.tess4j + tess4j + + diff --git a/ruoyi-common/ruoyi-common-core/src/main/java/org/dromara/common/core/utils/ocr/TesseractUtils.java b/ruoyi-common/ruoyi-common-core/src/main/java/org/dromara/common/core/utils/ocr/TesseractUtils.java new file mode 100644 index 000000000..1faab9acb --- /dev/null +++ b/ruoyi-common/ruoyi-common-core/src/main/java/org/dromara/common/core/utils/ocr/TesseractUtils.java @@ -0,0 +1,76 @@ +package org.dromara.common.core.utils.ocr; + +import cn.hutool.core.io.FileUtil; +import lombok.extern.slf4j.Slf4j; +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + +import java.io.File; + +/** + * Tesseract OCR 工具类,提供OCR识别功能 + */ +@Slf4j +public class TesseractUtils { + private static final String DEFAULT_LANGUAGE = "chi_sim"; + private static final String DATAPATH = FileUtil.getTmpDir() + FileUtil.FILE_SEPARATOR + "traineddata"; + + private static final ThreadLocal THREAD_LOCAL_TESSERACT = ThreadLocal.withInitial(() -> { + ITesseract tesseract = new Tesseract(); + tesseract.setDatapath(DATAPATH); // 设置训练数据路径 + tesseract.setLanguage(DEFAULT_LANGUAGE); // 设置默认语言 + return tesseract; + }); + + /** + * 设置当前线程的语言 + * + * @param language 语言代码,如 "chi_sim"(简体中文)、"eng"(英语) + */ + public static void setLanguage(String language) { + ITesseract tesseract = THREAD_LOCAL_TESSERACT.get(); + tesseract.setLanguage(language); + } + + /** + * 执行 OCR 识别操作,使用默认语言(如 "chi_sim") + * + * @param file 需要识别的图片文件 + * @return 识别结果,返回图片中的文字内容 + * @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常 + */ + public static String performOCRWithDefaultLanguage(File file) throws TesseractException { + ITesseract tesseract = THREAD_LOCAL_TESSERACT.get(); + tesseract.setLanguage(DEFAULT_LANGUAGE); + return tesseract.doOCR(file); + } + + /** + * 执行 OCR 识别操作 + * + * @param file 需要识别的图片文件 + * @return 识别结果,返回图片中的文字内容 + * @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常 + */ + public static String performOCR(File file) throws TesseractException { + ITesseract tesseract = THREAD_LOCAL_TESSERACT.get(); + return tesseract.doOCR(file); + } + + /** + * 执行 OCR 识别操作 + * + * @param file 需要识别的图片文件 + * @param language 需要使用的语言(如 "chi_sim" 或 "eng") + * @return 识别结果,返回图片中的文字内容 + * @throws TesseractException 如果 OCR 识别过程中发生异常,抛出此异常 + */ + public static String performOCR(File file, String language) throws TesseractException { + ITesseract tesseract = THREAD_LOCAL_TESSERACT.get(); + // 设置线程对应的 OCR 语言 + tesseract.setLanguage(language); + return tesseract.doOCR(file); + } + +}