VS+Tesseract中英文识别代码

使用VS+Tesseract来实现中文和英文的识别:

#include<tesseract/baseapi.h>
#include<opencv2/opencv.hpp>
#include<iostream>
#include<Windows.h>
using namespace cv;
using namespace std;
using namespace tesseract;

#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/libtesseract302.lib") 
#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/liblept168.lib")

char* U2G(const char* utf8)
{
    int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
    len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    return str;
}

char* G2U(const char* gb2312)
{
    int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
    len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    return str;
}

void main(){
    Mat src = imread("D://a//imgch.jpg",0);
    TessBaseAPI orc;
    char *langue = "eng+chi_sim";
    //char *langue1 = "";
    orc.Init(NULL,langue,OEM_TESSERACT_ONLY);
    orc.SetVariable("tessedit_write_images", "1");
    orc.SetPageSegMode(PSM_SINGLE_BLOCK);
    orc.SetImage(src.data, src.cols, src.rows, src.elemSize() ,src.cols);
    char *text = orc.GetUTF8Text();//.GetUTF8Text();
    String s = U2G(text);
    cout << s << endl;
    cout << orc.MeanTextConf() << endl << endl;
    imshow("src",src);
    waitKey(0);

}

测试图片:


测试图片

测试结果:


测试结果
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容