Java OCR tess4j 图片识别简单示例

2019年11月30日 184次阅读

1、项目中下载tess4j依赖包：

<!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j>
		<dependency>
		    <groupId>net.sourceforge.tess4j</groupId>
		    <artifactId>tess4j</artifactId>
		    <version>4.3.0</version>
		</dependency>

也可以去管网单独下载： https://sourceforge.net/projects/tess4j/

2、下载相关的语言包 tessdata ：https://github.com/tesseract-ocr/tessdata

放于项目中或其它位置

3、编码开始：

import java.io.File;
import java.util.HashMap;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

/**
 * tesseract for java， ocr（Optical Character Recognition，光学字符识别）
* 工具类
* @author wind
 */
public class Tess4jUtils {
	
	 //单个图片文件
	 public static String  File_Tess4j(String filePath){
		    String result =null;
		    File imageFile = new File(filePath);
	        ITesseract instance = new Tesseract();  // JNA Interface Mapping
	          //  项目根目录 下的 语言包路径
	           instance.setDatapath("tessdata");
	           instance.setLanguage("chi_sim");// 中文识别
	        try {
	            result = instance.doOCR(imageFile);
	        } catch (TesseractException e) {
	           e.printStackTrace();
	        }
	        return result ;
	 }
	 
	 
	// 文件夹内所有图片
	 public static HashMap<String, Object>  Folder_Tess4j(String folderPath){
		 HashMap<String, Object>  map = new HashMap<>();
		 File folder = new File(folderPath);
		 ITesseract instance = new Tesseract();
		 //  项目根目录 下的 语言包路径
         instance.setDatapath("tessdata");
         instance.setLanguage("chi_sim");// 中文识别
		 try { 
			 File[] files = folder.listFiles(); 
		 for (File file : files) { 
			  String content = instance.doOCR(file);
			  map.put(file.getName(), content);
			 } 
		 } catch (TesseractException e) { 
			e.getMessage(); 
		 } 
		 return map;
	}
	 
	 
	 public static void main(String[] args) {
	       String filePath = "g://pic/1.jpg";
	       String folderPath = "g:/pic";
	      String content = Tess4jUtils.File_Tess4j(filePath);
	       System.out.println(content);
	     /*  HashMap<String, Object>  map =  Tess4jUtils.Folder_Tess4j(folderPath);
	       System.out.println(map);*/
	    }
	 

}

识别英文和数字较好点，中文稍差点