|
|
#include "pch.h"
|
|
|
#include "OCRCharset.h"
|
|
|
#include "EasyOCR_Recognizer.h"
|
|
|
|
|
|
uns::EasyOCR_Recognizer::NormalizePAD::Size3i uns::EasyOCR_Recognizer::NormalizePAD::Size3i::operator=(const Size3i& obj)
|
|
|
{
|
|
|
d0 = obj.d0;
|
|
|
d1 = obj.d1;
|
|
|
d2 = obj.d2;
|
|
|
return (*this);
|
|
|
}
|
|
|
|
|
|
uns::EasyOCR_Recognizer::NormalizePAD::NormalizePAD(Size3i max_size, const std::string& PAD_type)
|
|
|
{
|
|
|
this->max_size = max_size;
|
|
|
this->PAD_type = PAD_type;
|
|
|
max_width_half = max_size.d2 / 2; // 计算宽度的一半,用于可选操作
|
|
|
}
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::NormalizePAD::operator()(const cv::Mat& input_img) const
|
|
|
{
|
|
|
// 将原图转换为32位浮点型并归一化到[0,1]
|
|
|
cv::Mat img;
|
|
|
input_img.convertTo(img, CV_32F, 1.0 / 255); // line 10: img = toTensor
|
|
|
img = (img - 0.5f) / 0.5f; // line 11: img.sub_(0.5).div_(0.5)
|
|
|
int h = img.rows; // 获取图像高度
|
|
|
int w = img.cols; // 获取图像宽度
|
|
|
int c = img.channels(); // 获取通道数,灰度图默认为1
|
|
|
// 创建目标大小的全零Mat,类型为32F,尺寸为max_size.d1 x max_size.d2
|
|
|
cv::Mat pad_img = cv::Mat::zeros(max_size.d1, max_size.d2, CV_32FC(c)); // line 13
|
|
|
// 将原图像拷贝到pad_img的左上角区域,实现右侧填充
|
|
|
img.copyTo(pad_img(cv::Rect(0, 0, w, h))); // line 14
|
|
|
// 如果目标宽度大于原图宽度,则使用最后一列像素进行扩展填充
|
|
|
if (max_size.d2 != w)
|
|
|
{ // line 15
|
|
|
cv::Mat last_col = img.col(w - 1);
|
|
|
cv::Mat border;
|
|
|
cv::repeat(last_col, 1, max_size.d2 - w, border); // 重复最后一列填充
|
|
|
border.copyTo(pad_img(cv::Rect(w, 0, max_size.d2 - w, h)));
|
|
|
}
|
|
|
return pad_img; // 返回处理后的浮点张量
|
|
|
}
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::AdjustContrastGrey(const cv::Mat& img_in, double target) const
|
|
|
{
|
|
|
double contrast;
|
|
|
int high, low;
|
|
|
ContrastGrey(img_in, contrast, high, low);
|
|
|
cv::Mat img = img_in.clone();
|
|
|
if (contrast < target)
|
|
|
{
|
|
|
cv::Mat img_i;
|
|
|
img.convertTo(img_i, CV_32S);
|
|
|
double ratio = 200.0 / std::max(10, high - low);
|
|
|
img_i = (img_i - low + 25) * ratio;
|
|
|
// 将像素值限制在[0,255]范围,并转换回8位
|
|
|
img_i.forEach<int>([] (int& pixel, const int*)
|
|
|
{
|
|
|
pixel = std::clamp(pixel, 0, 255);
|
|
|
});
|
|
|
img_i.convertTo(img, CV_8U);
|
|
|
}
|
|
|
return img;
|
|
|
}
|
|
|
|
|
|
void uns::EasyOCR_Recognizer::AlignCollate::ContrastGrey(const cv::Mat& img, double& contrast, int& high, int& low) const
|
|
|
{
|
|
|
// 将Mat图像数据复制到一个连续的vector<int>中,以便排序
|
|
|
std::vector<int> pixels;
|
|
|
pixels.reserve(img.rows * img.cols); // 预分配空间以提高效率
|
|
|
for (int i = 0; i < img.rows; ++i)
|
|
|
{
|
|
|
const uchar* row_ptr = img.ptr<uchar>(i);
|
|
|
for (int j = 0; j < img.cols; ++j)
|
|
|
pixels.push_back(static_cast<int>(row_ptr[j]));
|
|
|
}
|
|
|
// 对像素值进行排序,便于获取百分位数
|
|
|
std::sort(pixels.begin(), pixels.end());
|
|
|
// 计算90%的索引位置,与Python np.percentile保持一致
|
|
|
int idx90 = static_cast<int>(0.9 * (pixels.size() - 1));
|
|
|
int idx10 = static_cast<int>(0.1 * (pixels.size() - 1));
|
|
|
high = pixels[idx90];
|
|
|
low = pixels[idx10];
|
|
|
// 计算contrast: (high - low) / max(10, high + low)
|
|
|
contrast = double(high - low) / double(std::max(10, high + low));
|
|
|
}
|
|
|
|
|
|
uns::EasyOCR_Recognizer::AlignCollate::AlignCollate(int imgH, int imgW, bool keep_ratio_with_pad, double adjust_contrast)
|
|
|
{
|
|
|
this->imgH = imgH;
|
|
|
this->imgW = imgW;
|
|
|
this->adjust_contrast = adjust_contrast;
|
|
|
this->keep_ratio_with_pad = keep_ratio_with_pad;
|
|
|
}
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::AlignCollate::operator()(const std::vector<cv::Mat>& batch) const
|
|
|
{
|
|
|
std::vector<cv::Mat> resized_images;
|
|
|
|
|
|
// 创建NormalizePAD实例,用于归一化和填充
|
|
|
NormalizePAD transform({ 1, imgH, imgW });
|
|
|
|
|
|
for (const cv::Mat& image : batch)
|
|
|
{
|
|
|
cv::Mat working;
|
|
|
if (adjust_contrast > 0)
|
|
|
{
|
|
|
cv::Mat grey;
|
|
|
if (image.channels() > 1)
|
|
|
cv::cvtColor(image, grey, cv::COLOR_BGR2GRAY);
|
|
|
else
|
|
|
grey = image;
|
|
|
working = AdjustContrastGrey(grey, adjust_contrast);
|
|
|
}
|
|
|
else
|
|
|
working = image;
|
|
|
int w = working.cols;
|
|
|
int h = working.rows;
|
|
|
double ratio = double(w) / h;
|
|
|
int resized_w = static_cast<int>(std::ceil(imgH * ratio));
|
|
|
if (resized_w > imgW)
|
|
|
resized_w = imgW;
|
|
|
cv::Mat resized;
|
|
|
cv::resize(working, resized, cv::Size(resized_w, imgH), 0, 0, cv::INTER_CUBIC);
|
|
|
cv::Mat tensor = transform(resized);
|
|
|
resized_images.push_back(tensor);
|
|
|
}
|
|
|
cv::Mat blob;
|
|
|
cv::dnn::blobFromImages(resized_images, blob);
|
|
|
return blob;
|
|
|
}
|
|
|
|
|
|
float uns::EasyOCR_Recognizer::CustomMean(const VecFloat& x)
|
|
|
{
|
|
|
size_t N = x.size();
|
|
|
if (N == 0)
|
|
|
return 0.0f;
|
|
|
// 1. 计算所有元素的乘积
|
|
|
double prod = 1.0;
|
|
|
for (float v : x)
|
|
|
if (v != 0)
|
|
|
prod *= static_cast<double>(v);
|
|
|
// 2. 计算指数 2.0 / sqrt(N)
|
|
|
double exponent = 2.0 / std::sqrt(static_cast<double>(N));
|
|
|
// 3. 返回 prod 的 exponent 次幂
|
|
|
return static_cast<float>(std::pow(prod, exponent));
|
|
|
}
|
|
|
|
|
|
cv::Mat uns::EasyOCR_Recognizer::Preprocess(const cv::Mat& img) const
|
|
|
{
|
|
|
if (img.empty())
|
|
|
return {}; //此处不适合抛出异常,使用空图像终止后级的处理即可
|
|
|
cv::Mat gray;
|
|
|
int ch = img.channels();
|
|
|
// case 2: BGR 彩色图(3 通道)
|
|
|
if (ch == 3)
|
|
|
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
|
|
|
// case 3: RGBA 彩色图(4 通道)
|
|
|
else if (ch == 4)
|
|
|
{
|
|
|
// 去掉 alpha 通道,把 BGRA → GRAY
|
|
|
cv::Mat bgr;
|
|
|
cv::cvtColor(img, gray, cv::COLOR_BGRA2GRAY);
|
|
|
}
|
|
|
else // image 本身可能是 (h×w) 或者 (h×w×1),对我们来说都当灰度处理
|
|
|
gray = img;
|
|
|
int width = gray.cols;
|
|
|
int height = gray.rows;
|
|
|
int model_height = 64, model_width = 0;
|
|
|
float ratio = static_cast<float>(width) / static_cast<float>(height);
|
|
|
cv::Mat resized;
|
|
|
if (ratio < 1.0f)
|
|
|
{
|
|
|
// 垂直文本情况,使用 calculate_ratio 保证高度为 model_height
|
|
|
float adj_ratio = CalculateRatio(width, height);
|
|
|
model_width = static_cast<int>(model_height * adj_ratio);
|
|
|
cv::resize(gray, resized, cv::Size(model_height, model_width), 0, 0, cv::INTER_LINEAR);
|
|
|
ratio = adj_ratio;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
// 横向文本情况,高度为 model_height
|
|
|
model_width = static_cast<int>(model_height * ratio);
|
|
|
cv::resize(gray, resized, cv::Size(model_width, model_height), 0, 0, cv::INTER_LINEAR);
|
|
|
}
|
|
|
AlignCollate alignCollate(model_height, model_width, true, 0.5);
|
|
|
return alignCollate({ resized });
|
|
|
}
|
|
|
|
|
|
float uns::EasyOCR_Recognizer::CalculateRatio(int width, int height) const
|
|
|
{
|
|
|
float ratio = static_cast<float>(width) / static_cast<float>(height);
|
|
|
if (ratio < 1.0f)
|
|
|
ratio = 1.0f / ratio;
|
|
|
return ratio;
|
|
|
}
|
|
|
|
|
|
uns::VecFloat uns::EasyOCR_Recognizer::SoftMAX(const float* logits, int C) const
|
|
|
{
|
|
|
// 找到最大值以稳定数值
|
|
|
float m = logits[0];
|
|
|
for (int i = 1; i < C; ++i)
|
|
|
m = std::max(m, logits[i]);
|
|
|
// 计算 exp(logit - m)
|
|
|
std::vector<float> exps(C);
|
|
|
float sum = 0.f;
|
|
|
for (int i = 0; i < C; ++i)
|
|
|
{
|
|
|
exps[i] = std::exp(logits[i] - m);
|
|
|
sum += exps[i];
|
|
|
}
|
|
|
// 归一化
|
|
|
for (int i = 0; i < C; ++i)
|
|
|
exps[i] /= (sum > 1e-6f ? sum : 1e-6f);
|
|
|
return exps;
|
|
|
}
|
|
|
|
|
|
void uns::EasyOCR_Recognizer::PostprocessONNXOutput(const Ort::Value& outputs, int N, int T, int C, VecInt& out_indices, VecFloat& out_probs, const VecInt ignore_idx)
|
|
|
{
|
|
|
// 指针访问底层数据
|
|
|
const float* data = outputs.GetTensorData<float>();
|
|
|
out_indices.clear();
|
|
|
out_probs.clear();
|
|
|
// 临时存储每步概率
|
|
|
std::vector<float> probs;
|
|
|
probs.reserve(C);
|
|
|
// 遍历每个样本、每个时间步
|
|
|
for (int n = 0; n < N; ++n)
|
|
|
{
|
|
|
for (int t = 0; t < T; ++t)
|
|
|
{
|
|
|
// logits 起始位置: ((n * T) + t) * C
|
|
|
const float* logits = data + ((size_t)n * T + t) * C;
|
|
|
// 1) Softmax
|
|
|
probs = SoftMAX(logits, C);
|
|
|
// 2) 忽略 ignore_idx
|
|
|
if (!ignore_idx.empty())
|
|
|
for (const auto& idx : ignore_idx)
|
|
|
probs[idx] = 0.f;
|
|
|
// 3) 再次归一化
|
|
|
float sum = 0.f;
|
|
|
for (int c = 0; c < C; ++c)
|
|
|
sum += probs[c];
|
|
|
if (sum > 1e-6f)
|
|
|
{
|
|
|
for (int c = 0; c < C; ++c)
|
|
|
probs[c] /= sum;
|
|
|
}
|
|
|
// 4) 取最大索引
|
|
|
int best = 0;
|
|
|
float best_prob = 0.0f;
|
|
|
for (int c = 1; c < C; ++c)
|
|
|
{
|
|
|
if (probs[c] > probs[best])
|
|
|
{
|
|
|
best = c;
|
|
|
best_prob = probs[c];
|
|
|
}
|
|
|
}
|
|
|
out_indices.push_back(best);
|
|
|
out_probs.push_back(best_prob);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
uns::EasyOCR_Recognizer::EasyOCR_Recognizer()
|
|
|
{
|
|
|
ort_inited = false;
|
|
|
ort_cpu_session = nullptr;
|
|
|
model_path = G_OCRConfig.GetRecognizeModelPath();
|
|
|
ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
|
|
|
}
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::Init()
|
|
|
{
|
|
|
if (ort_inited)
|
|
|
return true;
|
|
|
if (!RecheckModelInfo())
|
|
|
return false;
|
|
|
try
|
|
|
{
|
|
|
ort_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "EasyOCR_Recognizer");
|
|
|
bool fallback_to_cpu = false;
|
|
|
if (!OCRToolBox::AutoSelectEP(ort, ort_session_options, fallback_to_cpu))
|
|
|
return false;
|
|
|
OCRToolBox::InitOrtSessionOptions(ort_session_options);
|
|
|
if ((G_OCRConfig.GetGPUUsage() == easyocr::GPUUsage::CPUOnly) || fallback_to_cpu) //使用CPU则初始化cpu session
|
|
|
{
|
|
|
ort_cpu_session = new Ort::Session(ort_env, model_path.c_str(), ort_session_options);
|
|
|
//通过CPU session获取输入输出名
|
|
|
OCRToolBox::GetInputOutputNames(ort_cpu_session, input_names, input_ns, output_names, output_ns);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
//通过临时session获取输入输出名(CUDA:线程不安全)
|
|
|
Ort::Session ort_session(ort_env, model_path.c_str(), ort_session_options);
|
|
|
OCRToolBox::GetInputOutputNames(&ort_session, input_names, input_ns, output_names, output_ns);
|
|
|
}
|
|
|
ort_inited = true;
|
|
|
return true;
|
|
|
}
|
|
|
catch (...)
|
|
|
{
|
|
|
return false;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::UnInit()
|
|
|
{
|
|
|
try
|
|
|
{
|
|
|
if (ort_cpu_session != nullptr)
|
|
|
delete ort_cpu_session;
|
|
|
ort_cpu_session = nullptr;
|
|
|
return true;
|
|
|
}
|
|
|
catch (...)
|
|
|
{
|
|
|
return false;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
bool uns::EasyOCR_Recognizer::RecheckModelInfo()
|
|
|
{
|
|
|
if (model_path.empty())
|
|
|
model_path = G_OCRConfig.GetRecognizeModelPath();
|
|
|
return OCRToolBox::CheckFile(model_path);
|
|
|
}
|
|
|
|
|
|
uns::EOCR_Result uns::EasyOCR_Recognizer::operator()(const cv::Mat& image)
|
|
|
{
|
|
|
try
|
|
|
{
|
|
|
if (!RecheckModelInfo())
|
|
|
return { L"", -1.0f };
|
|
|
cv::Mat input = Preprocess(image);
|
|
|
if (input.empty())
|
|
|
return { L"", 0.0f };
|
|
|
std::array<int64_t, 4> inputShape = { 1, 1, input.size[2], input.size[3] };
|
|
|
Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memInfo, input.ptr<float>(), input.total(), inputShape.data(), inputShape.size());
|
|
|
auto outputs = ((ort_cpu_session != nullptr) ? ort_cpu_session->Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1) : Ort::Session(ort_env, model_path.c_str(), ort_session_options).Run(Ort::RunOptions{nullptr}, input_names.data(), & inputTensor, 1, output_names.data(), 1));
|
|
|
// 输出 shape: [1, T, C]
|
|
|
auto& outVal = outputs.front();
|
|
|
auto info = outVal.GetTensorTypeAndShapeInfo();
|
|
|
auto shape = info.GetShape(); // {1, T, C}
|
|
|
int N = (int)shape[0], T = (int)shape[1], C = (int)shape[2];
|
|
|
float* data = outVal.GetTensorMutableData<float>();
|
|
|
// greedy pick & softmax
|
|
|
std::vector<int> indices(T);
|
|
|
std::vector<float> maxProbs(T);
|
|
|
PostprocessONNXOutput(outputs[0], N, T, C, indices, maxProbs);
|
|
|
// 解码
|
|
|
std::wstring text = OCRCharset::GetString(indices);
|
|
|
// 置信度
|
|
|
float conf = CustomMean(maxProbs);
|
|
|
return { text, conf };
|
|
|
}
|
|
|
catch (...)
|
|
|
{
|
|
|
return { L"", -2.0f };
|
|
|
}
|
|
|
}
|
|
|
|
|
|
uns::EOCR_ResultSet uns::EasyOCR_Recognizer::operator()(const cv::Mat& full_image, const EOCRD_Rects& rects)
|
|
|
{
|
|
|
if (!RecheckModelInfo())
|
|
|
return {};
|
|
|
try
|
|
|
{
|
|
|
EOCR_ResultSet result_set;
|
|
|
for (size_t i = 0; i < rects.size(); ++i)
|
|
|
{
|
|
|
// 将多边形转为最小外接矩形并裁剪
|
|
|
cv::Rect rect = cv::boundingRect(rects[i]);
|
|
|
rect &= cv::Rect(0, 0, full_image.cols, full_image.rows); // 裁剪到图像范围
|
|
|
cv::Mat crop = full_image(rect);
|
|
|
if (crop.empty())
|
|
|
continue;
|
|
|
auto [text, conf] = (*this)(crop);
|
|
|
result_set.insert({ i, { text, conf, rect } });
|
|
|
}
|
|
|
return result_set;
|
|
|
}
|
|
|
catch (...)
|
|
|
{
|
|
|
return {};
|
|
|
}
|
|
|
}
|