美文网首页
Java基于百度API的图片文字识别

Java基于百度API的图片文字识别

作者: GALAace | 来源:发表于2018-10-03 01:16 被阅读0次
    • 准备工作:

    使用之前需要获取对应的项目API_KEY,SECRET_KEY,这些参数在使用API的时候必须用到,用于生成access_token.
    如何获取这些参数?
    http://ai.baidu.com/tech/ocr/general
    登陆后创建一个应用

    百度文字识别后台
    点击创建应用,填入对应的信息.点击应用列表就可以得到API_KEY,SECRET_KEY了
    API_KEY和SECRET_KEY
    下载java的SDK,并将jar包引入项目
    选择Java SDK
    • 获取access_token
    package com.baidu.ai.aip.auth;
    
    import org.json.JSONObject;
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.List;
    import java.util.Map;
    
    /**
     * 获取token类
     */
    public class AuthService {
    
        /**
         * 获取权限token
         * @return 返回示例:
         * {
         * "access_token": "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567",
         * "expires_in": 2592000
         * }
         */
        public static String getAuth() {
            // 官网获取的 API Key 更新为你注册的
            String clientId = "百度云应用的AK";
            // 官网获取的 Secret Key 更新为你注册的
            String clientSecret = "百度云应用的SK";
            return getAuth(clientId, clientSecret);
        }
    
        /**
         * 获取API访问token
         * 该token有一定的有效期,需要自行管理,当失效时需重新获取.
         * @param ak - 百度云官网获取的 API Key
         * @param sk - 百度云官网获取的 Securet Key
         * @return assess_token 示例:
         * "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567"
         */
        public static String getAuth(String ak, String sk) {
            // 获取token地址
            String authHost = "https://aip.baidubce.com/oauth/2.0/token?";
            String getAccessTokenUrl = authHost
                    // 1. grant_type为固定参数
                    + "grant_type=client_credentials"
                    // 2. 官网获取的 API Key
                    + "&client_id=" + ak
                    // 3. 官网获取的 Secret Key
                    + "&client_secret=" + sk;
            try {
                URL realUrl = new URL(getAccessTokenUrl);
                // 打开和URL之间的连接
                HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
                connection.setRequestMethod("GET");
                connection.connect();
                // 获取所有响应头字段
                Map<String, List<String>> map = connection.getHeaderFields();
                // 遍历所有的响应头字段
                for (String key : map.keySet()) {
                    System.err.println(key + "--->" + map.get(key));
                }
                // 定义 BufferedReader输入流来读取URL的响应
                BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
                String result = "";
                String line;
                while ((line = in.readLine()) != null) {
                    result += line;
                }
                /**
                 * 返回结果示例
                 */
                System.err.println("result:" + result);
                JSONObject jsonObject = new JSONObject(result);
                String access_token = jsonObject.getString("access_token");
                return access_token;
            } catch (Exception e) {
                System.err.printf("获取token失败!");
                e.printStackTrace(System.err);
            }
            return null;
        }
    
    }
    
    • 编写将图片转化成base64后再转化成urlencode的工具类
    package com.baidu.ai.aip.auth;
    
    import sun.misc.BASE64Encoder;
    
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.URLEncoder;
    
    /**
    * 图片转化base64后再UrlEncode结果
    */
    
    public class BaseImg64 {
       /**
        * 将一张本地图片转化成Base64字符串
        */
       public static String getImageStrFromPath(String imgPath) {
           InputStream in;
           byte[] data = null;
           // 读取图片字节数组
           try {
               in = new FileInputStream(imgPath);
               data = new byte[in.available()];
               in.read(data);
               in.close();
           } catch (IOException e) {
               e.printStackTrace();
           }
           // 对字节数组Base64编码
           BASE64Encoder encoder = new BASE64Encoder();
           // 返回Base64编码过再URLEncode的字节数组字符串
           return URLEncoder.encode(encoder.encode(data));
       }
    }
    
    • 编写调用百度API接口的方法,获取识别结果
    package com.baidu.ai.aip.auth;
    
    import org.apache.http.HttpResponse;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.entity.StringEntity;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.util.EntityUtils;
    
    import java.io.File;
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    
    /**
    * 图像文字识别
    */
    
    public class Check {
       private static final String POST_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=" + AuthService.getAuth();
    
       /**
        * 识别本地图片的文字
        */
       public static String checkFile(String path) throws URISyntaxException, IOException {
           File file = new File(path);
           if (!file.exists()) {
               throw new NullPointerException("图片不存在");
           }
           String image = BaseImg64.getImageStrFromPath(path);
           String param = "image=" + image;
           return post(param);
       }
    
       /**
        * 图片url
        * 识别结果,为json格式
        */
       public static String checkUrl(String url) throws IOException, URISyntaxException {
           String param = "url=" + url;
           return post(param);
       }
    
       /**
        * 通过传递参数:url和image进行文字识别
        */
       private static String post(String param) throws URISyntaxException, IOException {
           //开始搭建post请求
           HttpClient httpClient = new DefaultHttpClient();
           HttpPost post = new HttpPost();
           URI url = new URI(POST_URL);
           post.setURI(url);
           //设置请求头,请求头必须为application/x-www-form-urlencoded,因为是传递一个很长的字符串,不能分段发送
            post.setHeader("Content-Type", "application/x-www-form-urlencoded");
           StringEntity entity = new StringEntity(param);
           post.setEntity(entity);
           HttpResponse response = httpClient.execute(post);
           System.out.println(response.toString());
           if (response.getStatusLine().getStatusCode() == 200) {
               String str;
               try {
                   //读取服务器返回过来的json字符串数据
                   str = EntityUtils.toString(response.getEntity());
                   System.out.println(str);
                   return str;
               } catch (Exception e) {
                   e.printStackTrace();
                   return null;
               }
           }
           return null;
       }
    
       public static void main(String[] args) {
           String path = "test.jpg";
           try {
               long now = System.currentTimeMillis();
               checkFile(path);
                System.out.println("耗时:" + (System.currentTimeMillis() - now) / 1000 + "s");
            } catch (URISyntaxException | IOException e) {
               e.printStackTrace();
           }
       }
    }
    
    • 运行
    运行结果

    控制台打印:

    HTTP/1.1 200 OK [.....] org.apache.http.conn.BasicManagedEntity@ca263c2
    {"log_id": 6831958521466827273, "words_result_num": 22, "words_result": 
    [{"words": "BadU百度"}, 
    {"words": "百度一下"},
    {"words": "只我的关注推荐"}, 
    {"words": "导航"}, 
    {"words": "公以下信息根据您的兴趣推荐"}, 
    {"words": "实时热点"}, {"words": "心换"}, 
    {"words": "孙茜回应网友谩骂捆蟹大妈月入万元"}, 
    {"words": "7要开会明,@格溢学长"}, 
    {"words": "杨主席是你们直接@的?”好大的官威"},
    {"words": "平文涛欠一个道歉国庆上高速卖炒粉"}, 
    {"words": "啊"}, {"words": "搜狐新闻10-021207"},
    {"words": "安倍改组内阁名单摩托车违法闯高速"}, 
    {"words": "EDG开门红"}, 
    {"words": "国有景区门票降价"}, 
    {"words": "警方重新调查罗中国女排vS加"}, 
    {"words": "一样的国庆节,不一样的守护"}, 
    {"words": "女排3-0加拿大"}, 
    {"words": "国防部回应美舰"}, 
    {"words": "德普女儿恋情曝光葛晨虹教授去世"}, 
    {"words": "上海南京路的武警五周杀人案赔偿"}]}
    耗时:1s
    
    test.jpg

    对比原图片识别率还是很高的,耗时也很短.
    我这里只测试了百度的通用识别,如果想提高识别精度,还可以使用高精度识别.

    相关文章

      网友评论

          本文标题:Java基于百度API的图片文字识别

          本文链接:https://www.haomeiwen.com/subject/rptzoftx.html