You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
8.4 KiB
Python

import cv2
import numpy as np
from .base.base import HamburgerABC
from hyperlpr3.common.tools_process import cost
import math
from hyperlpr3.common.tokenize import token
def encode_images(image: np.ndarray, max_wh_ratio, target_shape, limited_max_width=160, limited_min_width=48):
imgC = 3
imgH, imgW = target_shape
# cv2.imshow("image", image)
# cv2.waitKey(0)
assert imgC == image.shape[2]
max_wh_ratio = max(max_wh_ratio, imgW / imgH)
imgW = int((imgH * max_wh_ratio))
imgW = max(min(imgW, limited_max_width), limited_min_width)
h, w = image.shape[:2]
ratio = w / float(h)
ratio_imgH = math.ceil(imgH * ratio)
ratio_imgH = max(ratio_imgH, limited_min_width)
if ratio_imgH > imgW:
resized_w = imgW
else:
resized_w = int(ratio_imgH)
resized_image = cv2.resize(image, (resized_w, imgH))
# print((resized_w, imgH))
# padding_im1 = np.ones((imgH, imgW, imgC), dtype=np.uint8) * 128
# padding_im1[:, 0:resized_w, :] = resized_image
# cv2.imwrite("pad.jpg", padding_im1)
resized_image = resized_image.astype('float32')
resized_image = (resized_image.transpose((2, 0, 1)) - 127.5) / 127.5
# resized_image -= 0.5
# resized_image *= 2
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
# np.save('fk.npy', padding_im)
return padding_im
def get_ignored_tokens():
return [0] # for ctc blank
class PPRCNNRecognitionMNN(HamburgerABC):
def __init__(self, mnn_path, character_file, *args, **kwargs):
from hyperlpr3.common.mnn_adapt import MNNAdapter
super().__init__(*args, **kwargs)
self.input_shape = (1, 3, self.input_size[0], self.input_size[1])
self.session = MNNAdapter(mnn_path, input_shape=self.input_shape, outputs_name=['output'])
self.character_list = token
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
ignored_tokens = get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
# only for predict
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
continue
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
def _run_session(self, data):
output = self.session.inference(data)
output = output.reshape(40, 6625)
# print(output[:, 0])
output = np.expand_dims([output], 0)
return output
def _postprocess(self, data):
prod = data[0]
argmax = np.argmax(prod, axis=2)
# print(argmax)
rmax = np.max(prod, axis=2)
# print(rmax)
result = self.decode(argmax, rmax, is_remove_duplicate=True)
return result[0]
def _preprocess(self, image):
assert len(
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
"image. "
h, w, _ = image.shape
wh_ratio = w * 1.0 / h
data = encode_images(image, wh_ratio, self.input_size, )
data = np.expand_dims(data, 0)
return data
class PPRCNNRecognitionORT(HamburgerABC):
def __init__(self, onnx_path, token_dict=token, *args, **kwargs):
import onnxruntime as ort
super().__init__(*args, **kwargs)
self.session = ort.InferenceSession(onnx_path, None)
self.input_config = self.session.get_inputs()[0]
self.output_config = self.session.get_outputs()[0]
self.input_size = self.input_config.shape[2:]
# print(self.input_size)
self.character_list = token_dict
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
ignored_tokens = get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
# only for predict
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
continue
# print(int(text_index[batch_idx][idx]))
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
# @cost("Recognition")
def _run_session(self, data) -> np.ndarray:
result = self.session.run([self.output_config.name], {self.input_config.name: data})
return result
def _postprocess(self, data) -> tuple:
if data:
prod = data[0]
argmax = np.argmax(prod, axis=2)
rmax = np.max(prod, axis=2)
result = self.decode(argmax, rmax, is_remove_duplicate=True)
return result[0]
else:
return '', 0.0
def _preprocess(self, image) -> np.ndarray:
assert len(
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
"image. "
h, w, _ = image.shape
wh_ratio = w * 1.0 / h
data = encode_images(image, wh_ratio, self.input_size, )
data = np.expand_dims(data, 0)
# print(data.shape)
return data
class PPRCNNRecognitionDNN(HamburgerABC):
def __init__(self, onnx_path, character_file, *args, **kwargs):
super().__init__(*args, **kwargs)
self.session = cv2.dnn.readNetFromONNX(onnx_path)
self.input_shape = (1, 3, self.input_size[0], self.input_size[1])
self.character_list = token
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
result_list = []
ignored_tokens = get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
# only for predict
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
continue
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
def _run_session(self, data):
self.session.setInput(data)
outputs = self.session.forward()
outputs = np.expand_dims(outputs, 0)
# print(outputs.shape)
return outputs
def _postprocess(self, data):
prod = data[0]
argmax = np.argmax(prod, axis=2)
rmax = np.max(prod, axis=2)
result = self.decode(argmax, rmax, is_remove_duplicate=True)
return result[0]
def _preprocess(self, image):
assert len(
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
"image. "
h, w, _ = image.shape
wh_ratio = w * 1.0 / h
data = encode_images(image, wh_ratio, self.input_size, )
data = np.expand_dims(data, 0)
return data