Google Tesseract-OCR 文字识别

作者: yasuion | 来源:发表于2019-09-27 18:12 被阅读0次

地址：https://github.com/rmtheis/tess-two

字库下载地址：https://github.com/tesseract-ocr/tesseract/wiki/Data-Files

1：dependencies {

implementation 'com.rmtheis:tess-two:9.0.0'

}

2：下载的字库复制到项目assets里面

3：

/* mDataPath 是字库在手机上的存储位置*/

private String mDataPath = Environment.getExternalStorageDirectory().getAbsolutePath() +"/tessdata/";

private Bitmap bitmap;

private TextView textView;

private Handler mHandler=new Handler(){

@Override

public void handleMessage(Message msg) {

super.handleMessage(msg);

switch (msg.what){

case 0:

textView.setText(OCRresult);

imageView.setImageBitmap(bitmap);

break;

}

};

File parentfile =new File(mDataPath);

if (!parentfile.exists()){// 确保路径存在

parentfile.mkdir();

}

copyFiles();// 复制字库到手机

String lang ="chi_sim+eng";// 使用简体中文 + 英文检测

final TessBaseAPI mTess =new TessBaseAPI();

mTess.init(Environment.getExternalStorageDirectory().getAbsolutePath(), lang);// 初始化，第一个参数为 mDataPath 的父目录

final Long starttime = System.currentTimeMillis();// 检测开始时间

new Thread(new Runnable() {

@Override

public void run() {

// 获取测试图片

bitmap = BitmapFactory.decodeResource(MainActivity.this.getResources(), R.drawable.text123123);

mTess.setImage(bitmap);

OCRresult =mTess.getUTF8Text();// 拿到字符串结果

Long endtime = System.currentTimeMillis();// 检测结束时间

mHandler.sendEmptyMessage(0);

Log.e("test","检测结果："+OCRresult);

Log.e("test", (endtime -starttime)/1000 +" s");

}

}).start();

// 工具类

private void copyFiles() {

String[] datafilepaths =new String[]{mDataPath +"/chi_sim.traineddata",mDataPath +"/eng.traineddata",mDataPath +"/chi_sim_vert.traineddata"};// 拷两个字库过去

for (String datafilepath : datafilepaths) {

copyFile(datafilepath);

}

private void copyFile(String datafilepath) {

try {

String filepath = datafilepath;

String[] filesegment = filepath.split(File.separator);

String filename = filesegment[(filesegment.length -1)];// 获取字库文件名

AssetManager assetManager = getAssets();

InputStream instream = assetManager.open(filename);// 打开字库文件

OutputStream outstream =new FileOutputStream(filepath);

byte[] buffer =new byte[1024];

int read;

while ((read = instream.read(buffer)) != -1) {

outstream.write(buffer,0, read);

}

outstream.flush();

outstream.close();

instream.close();

File file =new File(filepath);

if (!file.exists()) {

throw new FileNotFoundException();

}

}catch (FileNotFoundException e) {

e.printStackTrace();

}catch (IOException e) {

e.printStackTrace();

}

// 效果1 复杂图片识别时间 14 s 效果不佳

// 效果2 简单图片识别时间 1 s 识别效果好

// 百度有免费的OCR 效果很好。。。

网友评论

本文标题：Google Tesseract-OCR 文字识别

本文链接：https://www.haomeiwen.com/subject/ezeauctx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

Google Tesseract-OCR 文字识别

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读