添加项目文件。

master
UnknownObject 2 months ago
parent f04e32f737
commit 78be0daf80

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.6.33815.320
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "EasyOCR-CPP", "EasyOCR-CPP\EasyOCR-CPP.vcxproj", "{166232A4-C250-4AAE-9D41-F22ED3FB9585}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Debug|x64.ActiveCfg = Debug|x64
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Debug|x64.Build.0 = Debug|x64
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Debug|x86.ActiveCfg = Debug|Win32
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Debug|x86.Build.0 = Debug|Win32
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Release|x64.ActiveCfg = Release|x64
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Release|x64.Build.0 = Release|x64
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Release|x86.ActiveCfg = Release|Win32
{166232A4-C250-4AAE-9D41-F22ED3FB9585}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {E50EE780-F01F-41F9-8402-DA05ADECF9E4}
EndGlobalSection
EndGlobal

@ -0,0 +1,195 @@
/* Copyright (C) 2011 阙荣文
*
* ,.
* .
*
* : querw@sina.com
*/
// ATW.h: interface for the CBase64 class.
// by Ted.Que - Que's C++ Studio
// 2010-11-12
// 转换字符编码
#include "ATW.h"
std::string __do_w_to_a_utf8(const wchar_t* pwszText, UINT uCodePage)
{
// 空指针输入
if (pwszText == NULL) return "";
// 无法计算需要的长度.
int nNeedSize = WideCharToMultiByte(uCodePage, 0, pwszText, -1, NULL, 0, NULL, NULL);
if (0 == nNeedSize) return "";
// 分配空间,转换.
char* pRet = new char[nNeedSize + 1]; // 虽然返回WideCharToMultiByte的长度是包含 null 字符的长度, 还是多+一个字符.
memset(pRet, 0, nNeedSize + 1);
std::string strRet("");
if (0 == WideCharToMultiByte(uCodePage, 0, pwszText, -1, pRet, nNeedSize, NULL, NULL))
{
}
else
{
strRet = pRet;
}
delete[]pRet;
return strRet;
}
std::wstring __do_a_utf8_to_w(const char* pszText, UINT uCodePage)
{
// 空指针
if (pszText == NULL) return L"";
// 计算长度
int nNeedSize = MultiByteToWideChar(uCodePage, 0, pszText, -1, NULL, 0);
if (0 == nNeedSize) return L"";
// 分配空间,转换
std::wstring strRet(L"");
wchar_t* pRet = new wchar_t[nNeedSize + 1];
memset(pRet, 0, (nNeedSize + 1) * sizeof(wchar_t));
if (0 == MultiByteToWideChar(uCodePage, 0, pszText, -1, pRet, nNeedSize))
{
}
else
{
strRet = pRet;
}
delete[]pRet;
return strRet;
}
std::string WtoA(const std::wstring& strText)
{
return __do_w_to_a_utf8(strText.c_str(), CP_ACP);
}
std::string WtoA(const wchar_t* pwszText)
{
return __do_w_to_a_utf8(pwszText, CP_ACP);
}
std::wstring AtoW(const std::string& strText)
{
return __do_a_utf8_to_w(strText.c_str(), CP_ACP);
}
std::wstring AtoW(const char* pszText)
{
return __do_a_utf8_to_w(pszText, CP_ACP);
}
std::string WtoUTF8(const std::wstring& strText)
{
return __do_w_to_a_utf8(strText.c_str(), CP_UTF8);
}
std::string WtoUTF8(const wchar_t* pwszText)
{
return __do_w_to_a_utf8(pwszText, CP_UTF8);
}
std::wstring UTF8toW(const std::string& strText)
{
return __do_a_utf8_to_w(strText.c_str(), CP_UTF8);
}
std::wstring UTF8toW(const char* pszText)
{
return __do_a_utf8_to_w(pszText, CP_UTF8);
}
std::string UTF8toA(const std::string& src)
{
return WtoA(UTF8toW(src));
}
std::string UTF8toA(const char* src)
{
return WtoA(UTF8toW(src));
}
std::string AtoUTF8(const std::string& src)
{
return WtoUTF8(AtoW(src));
}
std::string AtoUTF8(const char* src)
{
return WtoUTF8(AtoW(src));
}
/*
UTF-8 6
1 0xxxxxxx
2 110xxxxx 10xxxxxx
3 1110xxxx 10xxxxxx 10xxxxxx
4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
5 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
6 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
// 返回值说明:
// 0 -> 输入字符串符合UTF-8编码规则
// -1 -> 检测到非法的UTF-8编码首字节
// -2 -> 检测到非法的UTF-8字节编码的后续字节.
int IsTextUTF8(const char* pszSrc)
{
const unsigned char* puszSrc = (const unsigned char*)pszSrc; // 一定要无符号的,有符号的比较就不正确了.
// 看看有没有BOM表示 EF BB BF
if (puszSrc[0] != 0 && puszSrc[0] == 0xEF &&
puszSrc[1] != 0 && puszSrc[1] == 0xBB &&
puszSrc[2] != 0 && puszSrc[2] == 0xBF)
{
return 0;
}
// 如果没有 BOM标识
bool bIsNextByte = false;
int nBytes = 0; // 记录一个字符的UTF8编码已经占用了几个字节.
const unsigned char* pCur = (const unsigned char*)pszSrc; // 指针游标用无符号字符型. 因为高位为1, 如果用 char 型, 会变为负数,不利于编程时候的比较操作.
while (pCur[0] != 0)
{
if (!bIsNextByte)
{
bIsNextByte = true;
if ((pCur[0] >> 7) == 0) { bIsNextByte = false; nBytes = 1; bIsNextByte = false; } // 最高位为0, ANSI 兼容的.
else if ((pCur[0] >> 5) == 0x06) { nBytes = 2; } // 右移5位后是 110 -> 2字节编码的UTF8字符的首字节
else if ((pCur[0] >> 4) == 0x0E) { nBytes = 3; } // 右移4位后是 1110 -> 3字节编码的UTF8字符的首字节
else if ((pCur[0] >> 3) == 0x1E) { nBytes = 4; } // 右移3位后是 11110 -> 4字节编码的UTF8字符的首字节
else if ((pCur[0] >> 2) == 0x3E) { nBytes = 5; } // 右移2位后是 111110 -> 5字节编码的UTF8字符的首字节
else if ((pCur[0] >> 1) == 0x7E) { nBytes = 6; } // 右移1位后是 1111110 -> 6字节编码的UTF8字符的首字节
else
{
nBytes = -1; // 非法的UTF8字符编码的首字节
break;
}
}
else
{
if ((pCur[0] >> 6) == 0x02) // 首先,后续字节必须以 10xxx 开头
{
nBytes--;
if (nBytes == 1) bIsNextByte = false; // 当 nBytes = 1时, 说明下一个字节应该是首字节.
}
else
{
nBytes = -2;
break;
}
}
// 下跳一个字符
pCur++;
}
if (nBytes == 1) return 0;
else return nBytes;
}

@ -0,0 +1,69 @@
/* Copyright (C) 2011 阙荣文
*
* ,.
* .
*
* : querw@sina.com
*/
/*
1. USC,Unicode <-> UTF-8.
2. <-> Unicode
Unicode, wstring .
Unicode string . ANSI, GB2312
UTF-8, string , UTF-8null.
*/
/*
使ATL USES_CONVERSION; A2W, A2T, W2A , alloca() .
便,, , 1M .
使,.
*/
#pragma once
#include <string>
#if defined(_WIN32) || defined(WIN32)
#include "Windows.h"
#endif
#if defined(_UNICODE) || defined(UNICODE)
#define TtoA WtoA
#define AtoT AtoW
#define WtoT(a) (a)
#define TtoW(a) (a)
typedef std::wstring _tstring;
#else
#define TtoA(a) (a)
#define AtoT(a) (a)
#define WtoT WtoA
#define TtoW AtoW
typedef std::string _tstring;
#endif
std::string WtoA(const wchar_t* pwszSrc);
std::string WtoA(const std::wstring &strSrc);
std::wstring AtoW(const char* pszSrc);
std::wstring AtoW(const std::string &strSrc);
std::string WtoUTF8(const wchar_t* pwszSrc);
std::string WtoUTF8(const std::wstring &strSrc);
std::wstring UTF8toW(const char* pszSrc);
std::wstring UTF8toW(const std::string &strSr);
std::string AtoUTF8(const char* src);
std::string AtoUTF8(const std::string &src);
std::string UTF8toA(const char* src);
std::string UTF8toA(const std::string &src);
// 检测一个以 null 结尾的字符串是否是UTF-8, 如果返回0, 也只表示这个串刚好符合UTF8的编码规则.
// 返回值说明:
// 1 -> 输入字符串符合UTF-8编码规则
// -1 -> 检测到非法的UTF-8编码首字节
// -2 -> 检测到非法的UTF-8字节编码的后续字节.
int IsTextUTF8(const char* pszSrc);

@ -0,0 +1,71 @@
// EasyOCR-CPP.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#pragma warning(disable : 4996)
#include "ATW.h"
#include <iostream>
#include "EasyOCR_Detector.h"
#include "EasyOCR_Recognizer.h"
int main()
{
const std::wstring detModelPath = L"F:\\SVProjects\\EasyOCR-CPP\\EasyOCR-CPP\\DetectionModel-EN.onnx";
const std::wstring recModelPath = L"F:\\SVProjects\\EasyOCR-CPP\\EasyOCR-CPP\\RecognitionModel-EN.onnx";
const std::wstring recModelWithChnPath = L"F:\\SVProjects\\EasyOCR-CPP\\EasyOCR-CPP\\RecognitionModel_EN+CH_SIM.onnx";
const std::string imagePath = "G:\\Users\\15819\\Desktop\\vin-2-o.jpg";
//const std::string imagePath = "G:\\Users\\15819\\Pictures\\SSP_ScreenShot\\Screen[0] AT 20250506151949601 - 927136.png";
//Init Config
uns::G_OCRConfig.SetGPUUsage(uns::OCRConfig::GPUUsage::PreferGPU);
uns::G_OCRConfig.SetDetectModelPath(detModelPath);
uns::G_OCRConfig.SetRecognizeModelPath(recModelPath);
uns::G_OCRConfig.SetLanguage(uns::OCRConfig::CharsetType::EN);
//Prepare
uns::EasyOCR_Detector detector;
uns::EasyOCR_Recognizer recognizer;
//Init Models
if (!detector.Init())
{
std::cout << "Detector Init Failure!" << std::endl;
return -1;
}
if (!recognizer.Init())
{
std::cout << "Recognizer Init Failure!" << std::endl;
return -2;
}
//Load Image
cv::Mat image = cv::imread(imagePath);
if (image.empty())
{
std::cerr << "Failed to load image: " << imagePath << std::endl;
return -3;
}
cv::blur(image, image, { 3,3 });
//Begin OCR
clock_t start = clock();
auto rects = detector(image);
clock_t detect_finished = clock();
auto results = recognizer(image, rects);
clock_t recognize_finished = clock();
for (const auto& [index, info] : results)
{
const auto& [text, conf] = info;
std::cout << "Box " << index << ": \"" << WtoA(text) << "\" Confidence=" << conf << "\n";
}
//Output Time Cost
double detect_time_cost = static_cast<double>(detect_finished - start) / static_cast<double>(CLOCKS_PER_SEC);
double recognize_time_cost = static_cast<double>(recognize_finished - detect_finished) / static_cast<double>(CLOCKS_PER_SEC);
printf("Detect Cost: %.4lf Second(s)\nRecognize Cost: %.4lf Second(s)\n", detect_time_cost, recognize_time_cost);
//Cleanup
detector.UnInit();
recognizer.UnInit();
//Exit
return 0;
}

@ -0,0 +1,157 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{166232a4-c250-4aae-9d41-f22ed3fb9585}</ProjectGuid>
<RootNamespace>EasyOCRCPP</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<IncludePath>F:\vcpkg\installed\x64-windows\include\opencv4;$(IncludePath)</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<IncludePath>F:\vcpkg\installed\x64-windows\include\opencv4;$(IncludePath)</IncludePath>
<CopyLocalProjectReference>true</CopyLocalProjectReference>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="ATW.cpp" />
<ClCompile Include="EasyOCR-CPP.cpp" />
<ClCompile Include="EasyOCR_Detector.cpp" />
<ClCompile Include="EasyOCR_Recognizer.cpp" />
<ClCompile Include="OCRCharset.cpp" />
<ClCompile Include="OCRConfig.cpp" />
<ClCompile Include="OCRToolBox.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="ATW.h" />
<ClInclude Include="EasyOCR_Detector.h" />
<ClInclude Include="EasyOCR_Recognizer.h" />
<ClInclude Include="OCRCharset.h" />
<ClInclude Include="OCRConfig.h" />
<ClInclude Include="OCRToolBox.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="源文件">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="头文件">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="资源文件">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="EasyOCR-CPP.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="ATW.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="OCRCharset.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="EasyOCR_Detector.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="OCRConfig.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="OCRToolBox.cpp">
<Filter>源文件</Filter>
</ClCompile>
<ClCompile Include="EasyOCR_Recognizer.cpp">
<Filter>源文件</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ATW.h">
<Filter>头文件</Filter>
</ClInclude>
<ClInclude Include="OCRCharset.h">
<Filter>头文件</Filter>
</ClInclude>
<ClInclude Include="EasyOCR_Detector.h">
<Filter>头文件</Filter>
</ClInclude>
<ClInclude Include="OCRConfig.h">
<Filter>头文件</Filter>
</ClInclude>
<ClInclude Include="OCRToolBox.h">
<Filter>头文件</Filter>
</ClInclude>
<ClInclude Include="EasyOCR_Recognizer.h">
<Filter>头文件</Filter>
</ClInclude>
</ItemGroup>
</Project>

@ -0,0 +1,262 @@
#include "EasyOCR_Detector.h"
#include "OCRConfig.h"
cv::Mat uns::EasyOCR_Detector::NormalizeMeanVariance(const cv::Mat& in)
{
cv::Mat img;
in.convertTo(img, CV_32FC3);
cv::Scalar mean(0.485f * 255, 0.456f * 255, 0.406f * 255);
cv::Scalar var(0.229f * 255, 0.224f * 255, 0.225f * 255);
img -= mean;
img /= var;
return img;
}
void uns::EasyOCR_Detector::AdjustResultCoordinates(EOCRD_Rects& polys, float ratioW, float ratioH, float ratioNet)
{
for (auto& poly : polys)
{
for (auto& pt : poly)
{
pt.x *= ratioW * ratioNet;
pt.y *= ratioH * ratioNet;
}
}
}
void uns::EasyOCR_Detector::ResizeAspectRatio(const cv::Mat& src, cv::Mat& dst, float squareSize, float magRatio, float& ratio, cv::Size& heatmapSize)
{
int h = src.rows, w = src.cols, c = src.channels();
float target = magRatio * std::max(h, w);
if (target > squareSize)
target = squareSize;
ratio = target / std::max(h, w);
int targetH = int(h * ratio), targetW = int(w * ratio);
cv::resize(src, dst, cv::Size(targetW, targetH));
int h32 = (targetH + 31) / 32 * 32;
int w32 = (targetW + 31) / 32 * 32;
cv::Mat canvas = cv::Mat::zeros(h32, w32, src.type());
dst.copyTo(canvas(cv::Rect(0, 0, targetW, targetH)));
dst = canvas;
heatmapSize = cv::Size(w32 / 2, h32 / 2);
}
bool uns::EasyOCR_Detector::GetDetBoxesCore(const cv::Mat& textmap, const cv::Mat& linkmap, float textThresh, float linkThresh, float lowText, EOCRD_Rects& boxes, cv::Mat& labels, std::vector<int>& mapper, bool estimateNumChars)
{
cv::Mat tmap = textmap.clone(), lmap = linkmap.clone();
int H = tmap.rows, W = tmap.cols;
// 1. 二值化 & 合并
cv::Mat textScore, linkScore;
cv::threshold(tmap, textScore, lowText, 1, cv::THRESH_BINARY);
cv::threshold(lmap, linkScore, linkThresh, 1, cv::THRESH_BINARY);
cv::Mat combined;
cv::add(textScore, linkScore, combined);
combined = cv::min(combined, 1);
// 2. 连通域
int nLabels = 0;
cv::Mat stats, centroids;
cv::Mat combined8u;
combined.convertTo(combined8u, CV_8U);
try
{
nLabels = cv::connectedComponentsWithStats(combined8u, labels, stats, centroids, 4);
}
catch (cv::Exception e)
{
return false;
}
// 3. 遍历每个 label
for (int k = 1; k < nLabels; ++k)
{
int area = stats.at<int>(k, cv::CC_STAT_AREA);
if (area < 10)
continue;
// 文本阈值过滤
cv::Mat mask = (labels == k);
double maxVal;
cv::minMaxLoc(tmap, nullptr, &maxVal, nullptr, nullptr, mask);
if (maxVal < textThresh)
continue;
// 构建 segmap
cv::Mat segmap = cv::Mat::zeros(H, W, CV_8UC1);
segmap.setTo(255, labels == k);
mapper.push_back(k);
// 删除 link 区域
segmap.setTo(0, (linkScore == 1) & (textScore == 0));
// 膨胀
int x = stats.at<int>(k, cv::CC_STAT_LEFT);
int y = stats.at<int>(k, cv::CC_STAT_TOP);
int wbox = stats.at<int>(k, cv::CC_STAT_WIDTH);
int hbox = stats.at<int>(k, cv::CC_STAT_HEIGHT);
int niter = int(std::sqrt(area * std::min(wbox, hbox) / float(wbox * hbox)) * 2);
int sx = std::max(0, x - niter), sy = std::max(0, y - niter);
int ex = std::min(W, x + wbox + niter + 1), ey = std::min(H, y + hbox + niter + 1);
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(1 + niter, 1 + niter));
cv::dilate(segmap(cv::Rect(sx, sy, ex - sx, ey - sy)), segmap(cv::Rect(sx, sy, ex - sx, ey - sy)), kernel);
// 轮廓拟合最小外接矩形
std::vector<cv::Point> pts;
cv::findNonZero(segmap, pts);
cv::RotatedRect rect = cv::minAreaRect(pts);
cv::Point2f boxPts[4];
rect.points(boxPts);
std::vector<cv::Point2f> box(boxPts, boxPts + 4);
// diamond->rect
float wlen = (float)cv::norm(box[0] - box[1]);
float hlen = (float)cv::norm(box[1] - box[2]);
float ratio = std::max(wlen, hlen) / (std::min(wlen, hlen) + 1e-5f);
if (std::abs(1 - ratio) <= 0.1f)
{
int minx = W, maxx = 0, miny = H, maxy = 0;
for (auto& p : pts)
{
minx = std::min(minx, p.x); maxx = std::max(maxx, p.x);
miny = std::min(miny, p.y); maxy = std::max(maxy, p.y);
}
box =
{
{ float(minx),float(miny) },
{ float(maxx),float(miny) },
{ float(maxx),float(maxy) },
{ float(minx),float(maxy) }
};
}
// 顺时针起点
int start = 0;
float minSum = box[0].x + box[0].y;
for (int i = 1; i < 4; i++)
{
float s = box[i].x + box[i].y;
if (s < minSum)
{
minSum = s;
start = i;
}
}
std::rotate(box.begin(), box.begin() + start, box.end());
boxes.push_back(box);
}
return (!boxes.empty());
}
uns::EasyOCR_Detector::EasyOCR_Detector()
{
ort_inited = false;
ort_cpu_session = nullptr;
model_path = G_OCRConfig.GetDetectModelPath();
ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
}
bool uns::EasyOCR_Detector::Init()
{
if (ort_inited)
return true;
if (!RecheckModelInfo())
return false;
try
{
ort_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "EasyOCR_Detector");
bool fallback_to_cpu = false;
if (!OCRToolBox::AutoSelectEP(ort, ort_session_options, fallback_to_cpu))
return false;
OCRToolBox::InitOrtSessionOptions(ort_session_options);
if ((G_OCRConfig.GetGPUUsage() == OCRConfig::GPUUsage::CPUOnly) || fallback_to_cpu) //使用CPU则初始化cpu session
{
ort_cpu_session = new Ort::Session(ort_env, model_path.c_str(), ort_session_options);
//通过CPU session获取输入输出名
OCRToolBox::GetInputOutputNames(ort_cpu_session, input_names, input_ns, output_names, output_ns);
}
else
{
//通过临时session获取输入输出名CUDA线程不安全
Ort::Session ort_session(ort_env, model_path.c_str(), ort_session_options);
OCRToolBox::GetInputOutputNames(&ort_session, input_names, input_ns, output_names, output_ns);
}
ort_inited = true;
return true;
}
catch (...)
{
return false;
}
}
bool uns::EasyOCR_Detector::UnInit()
{
try
{
if (ort_cpu_session != nullptr)
delete ort_cpu_session;
ort_cpu_session = nullptr;
return true;
}
catch (...)
{
return false;
}
}
bool uns::EasyOCR_Detector::RecheckModelInfo()
{
if (model_path.empty())
model_path = G_OCRConfig.GetDetectModelPath();
return OCRToolBox::CheckFile(model_path);
}
uns::EOCRD_Rects uns::EasyOCR_Detector::operator()(const cv::Mat& image)
{
// 0. check model
if (!RecheckModelInfo())
return {};
try
{
// 1. resize + normalize
cv::Mat resized;
float ratio;
cv::Size heatmapSize;
ResizeAspectRatio(image, resized, 1280.0f, 1.5f, ratio, heatmapSize);
cv::Mat input = NormalizeMeanVariance(resized);
// 2. 构造成 NCHW tensor
cv::dnn::blobFromImage(input, input);
std::array<int64_t, 4> shape = { 1, 3, input.size[2], input.size[3] };
Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value tensor = Ort::Value::CreateTensor<float>(memInfo, input.ptr<float>(), input.total(), shape.data(), shape.size());
// 3. ONNX 推理
auto outputs = ((ort_cpu_session != nullptr) ? ort_cpu_session->Run(Ort::RunOptions{nullptr}, input_names.data(), & tensor, 1, output_names.data(), 1) : Ort::Session(ort_env, model_path.c_str(), ort_session_options).Run(Ort::RunOptions{nullptr}, input_names.data(), & tensor, 1, output_names.data(), 1));
std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape();
// 4. 拆分 score_text & score_link
float* outData = outputs[0].GetTensorMutableData<float>();
int H = int(outputShape[1]), W = int(outputShape[2]);
cv::Mat score_text(H, W, CV_32F);
cv::Mat score_link(H, W, CV_32F);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
int offset = (y * W + x) * 2;
score_text.at<float>(y, x) = outData[offset + 0];
score_link.at<float>(y, x) = outData[offset + 1];
}
}
// --- 3. 得到 boxes/polys (heatmap 坐标系) ---
EOCRD_Rects boxes, polys;
cv::Mat labels;
std::vector<int> mapper;
if (!GetDetBoxesCore(score_text, score_link, textThreshold, linkThreshold, lowText, boxes, labels, mapper, false))
return {};
polys = boxes;
// --- 4. 计算最终映射比例 ---
float invRatio = 1.0f / ratio;
float ratioNetW = float(resized.cols) / float(heatmapSize.width);
float ratioNetH = float(resized.rows) / float(heatmapSize.height);
// 通常 heatmapSize = (resized.cols/2, resized.rows/2),所以 ratioNetW/H ≈ 2
// --- 5. 映射回原图 ---
AdjustResultCoordinates(polys, invRatio, invRatio, ratioNetW);
return polys;
}
catch (...)
{
return {};
}
}

@ -0,0 +1,51 @@
#pragma once
#include <vector>
#include "OCRToolBox.h"
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
namespace uns
{
class EasyOCR_Detector
{
private:
const float lowText = 0.4f;
const float textThreshold = 0.7f;
const float linkThreshold = 0.4f;
private:
const OrtApi* ort;
Ort::Env ort_env;
Ort::Session* ort_cpu_session;
Ort::SessionOptions ort_session_options;
bool ort_inited;
std::wstring model_path;
IONames input_names, output_names;
IONamesStorage input_ns, output_ns;
private:
cv::Mat NormalizeMeanVariance(const cv::Mat& in);
void AdjustResultCoordinates(EOCRD_Rects& polys, float ratioW, float ratioH, float ratioNet = 2.0f);
void ResizeAspectRatio(const cv::Mat& src, cv::Mat& dst, float squareSize, float magRatio, float& ratio, cv::Size& heatmapSize);
bool GetDetBoxesCore(const cv::Mat& textmap, const cv::Mat& linkmap, float textThresh, float linkThresh, float lowText, EOCRD_Rects& boxes, cv::Mat& labels, std::vector<int>& mapper, bool estimateNumChars);
public:
EasyOCR_Detector();
EasyOCR_Detector(const EasyOCR_Detector&) = delete;
public:
bool Init();
bool UnInit();
bool RecheckModelInfo();
/// <summary>
/// EasyOCR 文本检测函数
/// </summary>
/// <param name="image">待检测的图像三通道BGR图像</param>
/// <returns>检测到的矩形框</returns>
EOCRD_Rects operator()(const cv::Mat& image);
};
}

@ -0,0 +1,388 @@
#include "EasyOCR_Recognizer.h"
#include "OCRCharset.h"
uns::EasyOCR_Recognizer::NormalizePAD::Size3i uns::EasyOCR_Recognizer::NormalizePAD::Size3i::operator=(const Size3i& obj)
{
d0 = obj.d0;
d1 = obj.d1;
d2 = obj.d2;
return (*this);
}
uns::EasyOCR_Recognizer::NormalizePAD::NormalizePAD(Size3i max_size, const std::string& PAD_type)
{
this->max_size = max_size;
this->PAD_type = PAD_type;
max_width_half = max_size.d2 / 2; // 计算宽度的一半,用于可选操作
}
cv::Mat uns::EasyOCR_Recognizer::NormalizePAD::operator()(const cv::Mat& input_img) const
{
// 将原图转换为32位浮点型并归一化到[0,1]
cv::Mat img;
input_img.convertTo(img, CV_32F, 1.0 / 255); // line 10: img = toTensor
img = (img - 0.5f) / 0.5f; // line 11: img.sub_(0.5).div_(0.5)
int h = img.rows; // 获取图像高度
int w = img.cols; // 获取图像宽度
int c = img.channels(); // 获取通道数灰度图默认为1
// 创建目标大小的全零Mat类型为32F尺寸为max_size.d1 x max_size.d2
cv::Mat pad_img = cv::Mat::zeros(max_size.d1, max_size.d2, CV_32FC(c)); // line 13
// 将原图像拷贝到pad_img的左上角区域实现右侧填充
img.copyTo(pad_img(cv::Rect(0, 0, w, h))); // line 14
// 如果目标宽度大于原图宽度,则使用最后一列像素进行扩展填充
if (max_size.d2 != w)
{ // line 15
cv::Mat last_col = img.col(w - 1);
cv::Mat border;
cv::repeat(last_col, 1, max_size.d2 - w, border); // 重复最后一列填充
border.copyTo(pad_img(cv::Rect(w, 0, max_size.d2 - w, h)));
}
return pad_img; // 返回处理后的浮点张量
}
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::AdjustContrastGrey(const cv::Mat& img_in, double target) const
{
double contrast;
int high, low;
ContrastGrey(img_in, contrast, high, low);
cv::Mat img = img_in.clone();
if (contrast < target)
{
cv::Mat img_i;
img.convertTo(img_i, CV_32S);
double ratio = 200.0 / std::max(10, high - low);
img_i = (img_i - low + 25) * ratio;
// 将像素值限制在[0,255]范围并转换回8位
img_i.forEach<int>([] (int& pixel, const int*)
{
pixel = std::clamp(pixel, 0, 255);
});
img_i.convertTo(img, CV_8U);
}
return img;
}
void uns::EasyOCR_Recognizer::AlignCollate::ContrastGrey(const cv::Mat& img, double& contrast, int& high, int& low) const
{
// 将Mat图像数据复制到一个连续的vector<int>中,以便排序
std::vector<int> pixels;
pixels.reserve(img.rows * img.cols); // 预分配空间以提高效率
for (int i = 0; i < img.rows; ++i)
{
const uchar* row_ptr = img.ptr<uchar>(i);
for (int j = 0; j < img.cols; ++j)
pixels.push_back(static_cast<int>(row_ptr[j]));
}
// 对像素值进行排序,便于获取百分位数
std::sort(pixels.begin(), pixels.end());
// 计算90%的索引位置与Python np.percentile保持一致
int idx90 = static_cast<int>(0.9 * (pixels.size() - 1));
int idx10 = static_cast<int>(0.1 * (pixels.size() - 1));
high = pixels[idx90];
low = pixels[idx10];
// 计算contrast: (high - low) / max(10, high + low)
contrast = double(high - low) / double(std::max(10, high + low));
}
uns::EasyOCR_Recognizer::AlignCollate::AlignCollate(int imgH, int imgW, bool keep_ratio_with_pad, double adjust_contrast)
{
this->imgH = imgH;
this->imgW = imgW;
this->adjust_contrast = adjust_contrast;
this->keep_ratio_with_pad = keep_ratio_with_pad;
}
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::operator()(const std::vector<cv::Mat>& batch) const
{
std::vector<cv::Mat> resized_images;
// 创建NormalizePAD实例用于归一化和填充
NormalizePAD transform({ 1, imgH, imgW });
for (const cv::Mat& image : batch)
{
cv::Mat working;
if (adjust_contrast > 0)
{
cv::Mat grey;
if (image.channels() > 1)
cv::cvtColor(image, grey, cv::COLOR_BGR2GRAY);
else
grey = image;
working = AdjustContrastGrey(grey, adjust_contrast);
}
else
working = image;
int w = working.cols;
int h = working.rows;
double ratio = double(w) / h;
int resized_w = static_cast<int>(std::ceil(imgH * ratio));
if (resized_w > imgW)
resized_w = imgW;
cv::Mat resized;
cv::resize(working, resized, cv::Size(resized_w, imgH), 0, 0, cv::INTER_CUBIC);
cv::Mat tensor = transform(resized);
resized_images.push_back(tensor);
}
cv::Mat blob;
cv::dnn::blobFromImages(resized_images, blob);
return blob;
}
float uns::EasyOCR_Recognizer::CustomMean(const VecFloat& x)
{
size_t N = x.size();
if (N == 0)
return 0.0f;
// 1. 计算所有元素的乘积
double prod = 1.0;
for (float v : x)
if (v != 0)
prod *= static_cast<double>(v);
// 2. 计算指数 2.0 / sqrt(N)
double exponent = 2.0 / std::sqrt(static_cast<double>(N));
// 3. 返回 prod 的 exponent 次幂
return static_cast<float>(std::pow(prod, exponent));
}
cv::Mat uns::EasyOCR_Recognizer::Preprocess(const cv::Mat& img) const
{
if (img.empty())
return {}; //此处不适合抛出异常,使用空图像终止后级的处理即可
cv::Mat gray;
int ch = img.channels();
// case 2: BGR 彩色图3 通道)
if (ch == 3)
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
// case 3: RGBA 彩色图4 通道)
else if (ch == 4)
{
// 去掉 alpha 通道,把 BGRA → GRAY
cv::Mat bgr;
cv::cvtColor(img, gray, cv::COLOR_BGRA2GRAY);
}
else // image 本身可能是 (h×w) 或者 (h×w×1),对我们来说都当灰度处理
gray = img;
int width = gray.cols;
int height = gray.rows;
int model_height = 64, model_width = 0;
float ratio = static_cast<float>(width) / static_cast<float>(height);
cv::Mat resized;
if (ratio < 1.0f)
{
// 垂直文本情况,使用 calculate_ratio 保证高度为 model_height
float adj_ratio = CalculateRatio(width, height);
model_width = static_cast<int>(model_height * adj_ratio);
cv::resize(gray, resized, cv::Size(model_height, model_width), 0, 0, cv::INTER_LINEAR);
ratio = adj_ratio;
}
else
{
// 横向文本情况,高度为 model_height
model_width = static_cast<int>(model_height * ratio);
cv::resize(gray, resized, cv::Size(model_width, model_height), 0, 0, cv::INTER_LINEAR);
}
AlignCollate alignCollate(model_height, model_width, true, 0.5);
return alignCollate({ resized });
}
float uns::EasyOCR_Recognizer::CalculateRatio(int width, int height) const
{
float ratio = static_cast<float>(width) / static_cast<float>(height);
if (ratio < 1.0f)
ratio = 1.0f / ratio;
return ratio;
}
uns::VecFloat uns::EasyOCR_Recognizer::SoftMAX(const float* logits, int C) const
{
// 找到最大值以稳定数值
float m = logits[0];
for (int i = 1; i < C; ++i)
m = std::max(m, logits[i]);
// 计算 exp(logit - m)
std::vector<float> exps(C);
float sum = 0.f;
for (int i = 0; i < C; ++i)
{
exps[i] = std::exp(logits[i] - m);
sum += exps[i];
}
// 归一化
for (int i = 0; i < C; ++i)
exps[i] /= (sum > 1e-6f ? sum : 1e-6f);
return exps;
}
void uns::EasyOCR_Recognizer::PostprocessONNXOutput(const Ort::Value& outputs, int N, int T, int C, VecInt& out_indices, VecFloat& out_probs, const VecInt ignore_idx)
{
// 指针访问底层数据
const float* data = outputs.GetTensorData<float>();
out_indices.clear();
out_probs.clear();
// 临时存储每步概率
std::vector<float> probs;
probs.reserve(C);
// 遍历每个样本、每个时间步
for (int n = 0; n < N; ++n)
{
for (int t = 0; t < T; ++t)
{
// logits 起始位置: ((n * T) + t) * C
const float* logits = data + ((size_t)n * T + t) * C;
// 1) Softmax
probs = SoftMAX(logits, C);
// 2) 忽略 ignore_idx
if (!ignore_idx.empty())
for (const auto& idx : ignore_idx)
probs[idx] = 0.f;
// 3) 再次归一化
float sum = 0.f;
for (int c = 0; c < C; ++c)
sum += probs[c];
if (sum > 1e-6f)
{
for (int c = 0; c < C; ++c)
probs[c] /= sum;
}
// 4) 取最大索引
int best = 0;
float best_prob = 0.0f;
for (int c = 1; c < C; ++c)
{
if (probs[c] > probs[best])
{
best = c;
best_prob = probs[c];
}
}
out_indices.push_back(best);
out_probs.push_back(best_prob);
}
}
}
uns::EasyOCR_Recognizer::EasyOCR_Recognizer()
{
ort_inited = false;
ort_cpu_session = nullptr;
model_path = G_OCRConfig.GetRecognizeModelPath();
ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
}
bool uns::EasyOCR_Recognizer::Init()
{
if (ort_inited)
return true;
if (!RecheckModelInfo())
return false;
try
{
ort_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "EasyOCR_Recognizer");
bool fallback_to_cpu = false;
if (!OCRToolBox::AutoSelectEP(ort, ort_session_options, fallback_to_cpu))
return false;
OCRToolBox::InitOrtSessionOptions(ort_session_options);
if ((G_OCRConfig.GetGPUUsage() == OCRConfig::GPUUsage::CPUOnly) || fallback_to_cpu) //使用CPU则初始化cpu session
{
ort_cpu_session = new Ort::Session(ort_env, model_path.c_str(), ort_session_options);
//通过CPU session获取输入输出名
OCRToolBox::GetInputOutputNames(ort_cpu_session, input_names, input_ns, output_names, output_ns);
}
else
{
//通过临时session获取输入输出名CUDA线程不安全
Ort::Session ort_session(ort_env, model_path.c_str(), ort_session_options);
OCRToolBox::GetInputOutputNames(&ort_session, input_names, input_ns, output_names, output_ns);
}
ort_inited = true;
return true;
}
catch (...)
{
return false;
}
}
bool uns::EasyOCR_Recognizer::UnInit()
{
try
{
if (ort_cpu_session != nullptr)
delete ort_cpu_session;
ort_cpu_session = nullptr;
return true;
}
catch (...)
{
return false;
}
}
bool uns::EasyOCR_Recognizer::RecheckModelInfo()
{
if (model_path.empty())
model_path = G_OCRConfig.GetRecognizeModelPath();
return OCRToolBox::CheckFile(model_path);
}
uns::EOCR_Result uns::EasyOCR_Recognizer::operator()(const cv::Mat& image)
{
try
{
if (!RecheckModelInfo())
return { L"", -1.0f };
cv::Mat input = Preprocess(image);
if (input.empty())
return { L"", 0.0f };
std::array<int64_t, 4> inputShape = { 1, 1, input.size[2], input.size[3] };
Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memInfo, input.ptr<float>(), input.total(), inputShape.data(), inputShape.size());
auto outputs = ((ort_cpu_session != nullptr) ? ort_cpu_session->Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1) : Ort::Session(ort_env, model_path.c_str(), ort_session_options).Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1));
// 输出 shape: [1, T, C]
auto& outVal = outputs.front();
auto info = outVal.GetTensorTypeAndShapeInfo();
auto shape = info.GetShape(); // {1, T, C}
int N = (int)shape[0], T = (int)shape[1], C = (int)shape[2];
float* data = outVal.GetTensorMutableData<float>();
// greedy pick & softmax
std::vector<int> indices(T);
std::vector<float> maxProbs(T);
PostprocessONNXOutput(outputs[0], N, T, C, indices, maxProbs);
// 解码
std::wstring text = OCRCharset::GetString(indices);
// 置信度
float conf = CustomMean(maxProbs);
return { text, conf };
}
catch (...)
{
return { L"", -2.0f };
}
}
uns::EOCR_ResultSet uns::EasyOCR_Recognizer::operator()(const cv::Mat& full_image, const EOCRD_Rects& rects)
{
if (!RecheckModelInfo())
return {};
try
{
EOCR_ResultSet result_set;
for (size_t i = 0; i < rects.size(); ++i)
{
// 将多边形转为最小外接矩形并裁剪
cv::Rect rect = cv::boundingRect(rects[i]);
rect &= cv::Rect(0, 0, full_image.cols, full_image.rows); // 裁剪到图像范围
cv::Mat crop = full_image(rect);
if (crop.empty())
continue;
auto [text, conf] = (*this)(crop);
result_set.insert({ i, { text, conf } });
}
return result_set;
}
catch (...)
{
return {};
}
}

@ -0,0 +1,96 @@
#pragma once
#include <string>
#include <vector>
#include "OCRConfig.h"
#include "OCRToolBox.h"
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
namespace uns
{
class EasyOCR_Recognizer
{
public:
class NormalizePAD
{
public:
struct Size3i // 自定义3维尺寸结构表示通道、高度、宽度
{
int d0, d1, d2;
Size3i operator=(const Size3i& obj);
};
private:
Size3i max_size; // 最大尺寸
int max_width_half; // 宽度一半,用于可选的中心裁剪等功能
std::string PAD_type; // 填充类型,支持"right"或"left"
public:
NormalizePAD(Size3i max_size, const std::string& PAD_type = "right");
public:
cv::Mat operator()(const cv::Mat& input_img) const;
};
class AlignCollate
{
private:
int imgH; // 目标图像高度
int imgW; // 目标图像宽度
bool keep_ratio_with_pad; // 保持长宽比并填充标志(暂未使用)
double adjust_contrast; // 对比度调整目标值
private:
cv::Mat AdjustContrastGrey(const cv::Mat& img_in, double target = 0.4) const;
void ContrastGrey(const cv::Mat& img, double& contrast, int& high, int& low) const;
public:
AlignCollate(int imgH = 32, int imgW = 100, bool keep_ratio_with_pad = false, double adjust_contrast = 0.0);
public:
cv::Mat operator()(const std::vector<cv::Mat>& batch) const;
};
private:
const OrtApi* ort;
Ort::Env ort_env;
Ort::Session* ort_cpu_session;
Ort::SessionOptions ort_session_options;
bool ort_inited;
std::wstring model_path;
IONames input_names, output_names;
IONamesStorage input_ns, output_ns;
private:
float CustomMean(const VecFloat& x);
cv::Mat Preprocess(const cv::Mat& img) const;
float CalculateRatio(int width, int height) const;
VecFloat SoftMAX(const float* logits, int C) const;
void PostprocessONNXOutput(const Ort::Value& outputs, int N, int T, int C, VecInt& out_indices, VecFloat& out_probs, const VecInt ignore_idx = {});
public:
EasyOCR_Recognizer();
public:
bool Init();
bool UnInit();
bool RecheckModelInfo();
/// <summary>
/// EasyOCR 文本识别函数
/// </summary>
/// <param name="image">仅包含文本的图像三通道BGR图像</param>
/// <returns>文本及其置信度</returns>
EOCR_Result operator()(const cv::Mat& image);
/// <summary>
/// EasyOCR 文本识别函数
/// </summary>
/// <param name="image">完整的待检测图像三通道BGR图像</param>
/// <param name="rects">EasyOCR_Detector检测到的文本位置矩形</param>
/// <returns>若干个 [文本及其置信度] 的组合</returns>
EOCR_ResultSet operator()(const cv::Mat& full_image, const EOCRD_Rects& rects);
};
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,22 @@
#pragma once
#include <map>
#include <string>
#include <vector>
namespace uns
{
class OCRCharset
{
private:
static std::map<int, std::wstring> en_charmap;
static std::map<int, std::wstring> ch_en_charmap;
public:
OCRCharset() = delete;
OCRCharset(const OCRCharset&) = delete;
public:
static std::wstring GetChar(int index);
static std::wstring GetString(const std::vector<int>& indexs);
};
}

@ -0,0 +1,57 @@
#include "OCRConfig.h"
uns::OCRConfig uns::G_OCRConfig;
uns::OCRConfig::OCRConfig()
{
language = CharsetType::EN;
gpu_usage = GPUUsage::PreferGPU;
}
uns::OCRConfig::OCRConfig(const std::wstring& detect_model, const std::wstring& reco_model, CharsetType language, GPUUsage gpu)
{
gpu_usage = gpu;
this->language = language;
detect_model_path = detect_model;
recognize_model_path = reco_model;
}
uns::OCRConfig::GPUUsage uns::OCRConfig::GetGPUUsage() const
{
return gpu_usage;
}
uns::OCRConfig::CharsetType uns::OCRConfig::GetLanguage() const
{
return language;
}
std::wstring uns::OCRConfig::GetDetectModelPath() const
{
return detect_model_path;
}
std::wstring uns::OCRConfig::GetRecognizeModelPath() const
{
return recognize_model_path;
}
void uns::OCRConfig::SetGPUUsage(GPUUsage usage)
{
gpu_usage = usage;
}
void uns::OCRConfig::SetLanguage(CharsetType type)
{
language = type;
}
void uns::OCRConfig::SetDetectModelPath(const std::wstring& path)
{
detect_model_path = path;
}
void uns::OCRConfig::SetRecognizeModelPath(const std::wstring& path)
{
recognize_model_path = path;
}

@ -0,0 +1,47 @@
#pragma once
#include <string>
namespace uns
{
class OCRConfig
{
public:
enum class GPUUsage
{
ForceGPU, // 强制使用GPU失败报错
PreferGPU, // 优先GPU失败回退
CPUOnly // 仅使用CPU
};
enum class CharsetType
{
EN, //英语
EN_CH //英语和中文
};
private:
GPUUsage gpu_usage;
CharsetType language;
std::wstring detect_model_path;
std::wstring recognize_model_path;
public:
OCRConfig();
OCRConfig(const OCRConfig& obj) = delete;
OCRConfig(const std::wstring& detect_model, const std::wstring& reco_model, CharsetType language, GPUUsage gpu = GPUUsage::PreferGPU);
public:
GPUUsage GetGPUUsage() const;
CharsetType GetLanguage() const;
std::wstring GetDetectModelPath() const;
std::wstring GetRecognizeModelPath() const;
public:
void SetGPUUsage(GPUUsage usage);
void SetLanguage(CharsetType type);
void SetDetectModelPath(const std::wstring& path);
void SetRecognizeModelPath(const std::wstring& path);
};
extern OCRConfig G_OCRConfig;
}

@ -0,0 +1,85 @@
#include "OCRToolBox.h"
#include "OCRConfig.h"
bool uns::OCRToolBox::CheckFile(const std::wstring& file)
{
namespace fs = std::filesystem;
try
{
if (file.empty())
return false;
if (!fs::exists(file))
return false;
if (!fs::is_regular_file(file))
return false;
auto perms = fs::status(file).permissions();
return ((perms & fs::perms::owner_read) != fs::perms::none && (perms & fs::perms::owner_write) != fs::perms::none);
}
catch (...)
{
return false;
}
}
void uns::OCRToolBox::InitOrtSessionOptions(Ort::SessionOptions& se_opt)
{
se_opt.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
se_opt.EnableCpuMemArena(); // 启用内存池
se_opt.EnableMemPattern(); // 启用内存模式优化
}
bool uns::OCRToolBox::AutoSelectEP(const OrtApi* ort, Ort::SessionOptions& se_opt, bool& fallback_to_cpu)
{
fallback_to_cpu = false;
if (G_OCRConfig.GetGPUUsage() == OCRConfig::GPUUsage::CPUOnly)
return true;
try
{
OrtStatusPtr status = OrtSessionOptionsAppendExecutionProvider_CUDA(se_opt, 0);
if (status)
{
ort->ReleaseStatus(status);
if (G_OCRConfig.GetGPUUsage() == OCRConfig::GPUUsage::ForceGPU)
return false;
se_opt = Ort::SessionOptions(); //Reset Session Options (Fallback to CPU)
fallback_to_cpu = true;
return true;
}
else
return true; //GPU Enable Successful
}
catch (const Ort::Exception&)
{
se_opt = Ort::SessionOptions(); //Reset Session Options (Fallback to CPU)
fallback_to_cpu = true;
return true;
}
catch (...)
{
return false;
}
return false;
}
void uns::OCRToolBox::GetInputOutputNames(Ort::Session* ort_session, IONames& input_names, IONamesStorage& input_ns, IONames& output_names, IONamesStorage& output_ns)
{
input_ns.clear();
output_ns.clear();
input_names.clear();
output_names.clear();
if (ort_session == nullptr)
return;
auto input_name_alloc = ort_session->GetInputNameAllocated(0, Ort::AllocatorWithDefaultOptions());
std::string input_name = input_name_alloc.get();
input_ns = { input_name };
for (auto& name : input_ns)
input_names.push_back(name.c_str());
auto output_name_alloc = ort_session->GetOutputNameAllocated(0, Ort::AllocatorWithDefaultOptions());
std::string output_name = output_name_alloc.get();
output_ns = { output_name };
for (auto& name : output_ns)
output_names.push_back(name.c_str());
}

@ -0,0 +1,27 @@
#pragma once
#include <map>
#include <functional>
#include <filesystem>
#include <opencv2/core/core.hpp>
#include <onnxruntime_cxx_api.h>
namespace uns
{
using VecInt = std::vector<int>;
using VecFloat = std::vector<float>;
using IONames = std::vector<const char*>;
using IONamesStorage = std::vector<std::string>;
using EOCR_Result = std::pair<std::wstring, float>;
using EOCR_ResultSet = std::map<size_t, EOCR_Result>;
using EOCRD_Rects = std::vector<std::vector<cv::Point2f>>;
class OCRToolBox
{
public:
static bool CheckFile(const std::wstring& file);
static void InitOrtSessionOptions(Ort::SessionOptions& se_opt);
static bool AutoSelectEP(const OrtApi* ort, Ort::SessionOptions& se_opt, bool& fallback_to_cpu);
static void GetInputOutputNames(Ort::Session* ort_session, IONames& input_names, IONamesStorage& input_ns, IONames& output_names, IONamesStorage& output_ns);
};
}
Loading…
Cancel
Save