EasyOCR-CPP/LibEasyOCR-CPP/EasyOCR_Recognizer.h

#pragma once
#include <string>
#include <vector>
#include "OCRConfig.h"
#include "OCRToolBox.h"
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>

namespace uns
{
	class EasyOCR_Recognizer
	{
	public:
		class NormalizePAD
		{
		public:
			struct Size3i	// 自定义3维尺寸结构，表示通道、高度、宽度
			{
				int d0, d1, d2;

				Size3i operator=(const Size3i& obj);
			};

		private:
			Size3i max_size;         // 最大尺寸
			int max_width_half;      // 宽度一半，用于可选的中心裁剪等功能
			std::string PAD_type;    // 填充类型，支持"right"或"left"

		public:
			NormalizePAD(Size3i max_size, const std::string& PAD_type = "right");

		public:
			cv::Mat operator()(const cv::Mat& input_img) const;
		};

		class AlignCollate
		{
		private:
			int imgH;                    // 目标图像高度
			int imgW;                    // 目标图像宽度
			bool keep_ratio_with_pad;    // 保持长宽比并填充标志（暂未使用）
			double adjust_contrast;      // 对比度调整目标值

		private:
			cv::Mat AdjustContrastGrey(const cv::Mat& img_in, double target = 0.4) const;
			void ContrastGrey(const cv::Mat& img, double& contrast, int& high, int& low) const;

		public:
			AlignCollate(int imgH = 32, int imgW = 100, bool keep_ratio_with_pad = false, double adjust_contrast = 0.0);

		public:
			cv::Mat operator()(const std::vector<cv::Mat>& batch) const;
		};

	private:
		const OrtApi* ort;

		Ort::Env ort_env;
		Ort::Session* ort_cpu_session;
		Ort::SessionOptions ort_session_options;

		bool ort_inited;
		std::wstring model_path;

		IONames input_names, output_names;
		IONamesStorage input_ns, output_ns;

	private:
		float CustomMean(const VecFloat& x);
		cv::Mat Preprocess(const cv::Mat& img) const;
		float CalculateRatio(int width, int height) const;
		VecFloat SoftMAX(const float* logits, int C) const;
		void PostprocessONNXOutput(const Ort::Value& outputs, int N, int T, int C, VecInt& out_indices, VecFloat& out_probs, const VecInt ignore_idx = {});

	public:
		EasyOCR_Recognizer();

	public:
		bool Init();
		bool UnInit();
		bool RecheckModelInfo();
		/// <summary>
		/// EasyOCR 文本识别函数
		/// </summary>
		/// <param name="image">仅包含文本的图像（三通道BGR图像）</param>
		/// <returns>文本及其置信度</returns>
		EOCR_Result operator()(const cv::Mat& image);
		/// <summary>
		/// EasyOCR 文本识别函数
		/// </summary>
		/// <param name="image">完整的待检测图像（三通道BGR图像）</param>
		/// <param name="rects">EasyOCR_Detector检测到的文本位置矩形</param>
		/// <returns>若干个 [文本及其置信度] 的组合</returns>
		EOCR_ResultSet operator()(const cv::Mat& full_image, const EOCRD_Rects& rects);
	};
}