diff --git a/pom.xml b/pom.xml index d9034bd..0710c2a 100644 --- a/pom.xml +++ b/pom.xml @@ -48,6 +48,13 @@ spring-boot-starter-test test + + + + net.sourceforge.tess4j + tess4j + 5.18.0 + diff --git a/src/main/java/com/tem/bocai/config/TessConfig.java b/src/main/java/com/tem/bocai/config/TessConfig.java new file mode 100644 index 0000000..f544dd1 --- /dev/null +++ b/src/main/java/com/tem/bocai/config/TessConfig.java @@ -0,0 +1,18 @@ +package com.tem.bocai.config; + +import net.sourceforge.tess4j.Tesseract; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class TessConfig { + + @Bean + public Tesseract tesseract() { + + Tesseract instance = new Tesseract(); + instance.setLanguage("oci"); // 设置语言包,这里使用英语 + instance.setDatapath("src/main/resources/tessdata"); // 设置语言包路径 + return instance; + } +} diff --git a/src/main/java/com/tem/bocai/controller/TestController.java b/src/main/java/com/tem/bocai/controller/TestController.java new file mode 100644 index 0000000..1b1a7d9 --- /dev/null +++ b/src/main/java/com/tem/bocai/controller/TestController.java @@ -0,0 +1,37 @@ +package com.tem.bocai.controller; + +import com.tem.bocai.util.ImageOcrService; +import net.sourceforge.tess4j.TesseractException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.io.IOException; + +@RestController +public class TestController { + + + private final ImageOcrService imageOcrService; + + @Autowired + public TestController(ImageOcrService imageOcrService) { + + this.imageOcrService = imageOcrService; + } + + @GetMapping("/ocr/local") + public ResponseEntity ocrLocalImage(String imagePath) throws IOException, TesseractException { + imagePath = "b.jpg"; + String result = imageOcrService.ocrLocalImage(imagePath); + return ResponseEntity.ok(result); + } + + @GetMapping("/ocr/remote") + public ResponseEntity ocrRemoteImage(String imageUrl) throws IOException, TesseractException { + + String result = imageOcrService.ocrRemoteImage(imageUrl); + return ResponseEntity.ok(result); + } +} diff --git a/src/main/java/com/tem/bocai/util/ImageOcrService.java b/src/main/java/com/tem/bocai/util/ImageOcrService.java new file mode 100644 index 0000000..909fced --- /dev/null +++ b/src/main/java/com/tem/bocai/util/ImageOcrService.java @@ -0,0 +1,56 @@ +package com.tem.bocai.util; + +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; +import org.apache.commons.io.IOUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; +import org.springframework.stereotype.Service; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +@Service +public class ImageOcrService { + + + private final Tesseract tesseract; + private final ResourceLoader resourceLoader; + + @Autowired + public ImageOcrService(Tesseract tesseract, ResourceLoader resourceLoader) { + + this.tesseract = tesseract; + this.resourceLoader = resourceLoader; + } + + /** + * 从本地文件路径读取图片并进行 OCR 处理 + * + * @param imagePath 图片文件路径 + * @return OCR 结果文本 + */ + public String ocrLocalImage(String imagePath) throws IOException, TesseractException { + + InputStream inputStream = getClass().getClassLoader().getResourceAsStream(imagePath); + BufferedImage image = ImageIO.read(inputStream); + return tesseract.doOCR(image); + } + + /** + * 从远程 URL 获取图片并进行 OCR 处理 + * + * @param imageUrl 图片 URL + * @return OCR 结果文本 + */ + public String ocrRemoteImage(String imageUrl) throws IOException, TesseractException { + + byte[] imageData = IOUtils.toByteArray(new ByteArrayInputStream(IOUtils.toByteArray(imageUrl))); + BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageData)); + return tesseract.doOCR(image); + } +} diff --git a/src/main/resources/a.jpg b/src/main/resources/a.jpg new file mode 100644 index 0000000..278c8ca Binary files /dev/null and b/src/main/resources/a.jpg differ diff --git a/src/main/resources/b.jpg b/src/main/resources/b.jpg new file mode 100644 index 0000000..3bd1871 Binary files /dev/null and b/src/main/resources/b.jpg differ diff --git a/src/main/resources/tessdata/eng.traineddata b/src/main/resources/tessdata/eng.traineddata new file mode 100644 index 0000000..f4744c2 Binary files /dev/null and b/src/main/resources/tessdata/eng.traineddata differ diff --git a/src/main/resources/tessdata/oci.traineddata b/src/main/resources/tessdata/oci.traineddata new file mode 100644 index 0000000..614c489 Binary files /dev/null and b/src/main/resources/tessdata/oci.traineddata differ