1、项目中下载tess4j依赖包:
<!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j>
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.3.0</version>
</dependency>
也可以去管网单独下载: https://sourceforge.net/projects/tess4j/
2、下载相关的语言包 tessdata :https://github.com/tesseract-ocr/tessdata
放于项目中或其它位置
3、编码开始:
import java.io.File;
import java.util.HashMap;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
/**
* tesseract for java, ocr(Optical Character Recognition,光学字符识别)
* 工具类
* @author wind
*/
public class Tess4jUtils {
//单个图片文件
public static String File_Tess4j(String filePath){
String result =null;
File imageFile = new File(filePath);
ITesseract instance = new Tesseract(); // JNA Interface Mapping
// 项目根目录 下的 语言包路径
instance.setDatapath("tessdata");
instance.setLanguage("chi_sim");// 中文识别
try {
result = instance.doOCR(imageFile);
} catch (TesseractException e) {
e.printStackTrace();
}
return result ;
}
// 文件夹内所有图片
public static HashMap<String, Object> Folder_Tess4j(String folderPath){
HashMap<String, Object> map = new HashMap<>();
File folder = new File(folderPath);
ITesseract instance = new Tesseract();
// 项目根目录 下的 语言包路径
instance.setDatapath("tessdata");
instance.setLanguage("chi_sim");// 中文识别
try {
File[] files = folder.listFiles();
for (File file : files) {
String content = instance.doOCR(file);
map.put(file.getName(), content);
}
} catch (TesseractException e) {
e.getMessage();
}
return map;
}
public static void main(String[] args) {
String filePath = "g://pic/1.jpg";
String folderPath = "g:/pic";
String content = Tess4jUtils.File_Tess4j(filePath);
System.out.println(content);
/* HashMap<String, Object> map = Tess4jUtils.Folder_Tess4j(folderPath);
System.out.println(map);*/
}
}
识别 英文和数字 较好点,中文稍差点