You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
8.4 KiB
Python
234 lines
8.4 KiB
Python
import cv2
|
|
import numpy as np
|
|
from .base.base import HamburgerABC
|
|
from hyperlpr3.common.tools_process import cost
|
|
import math
|
|
from hyperlpr3.common.tokenize import token
|
|
|
|
|
|
def encode_images(image: np.ndarray, max_wh_ratio, target_shape, limited_max_width=160, limited_min_width=48):
|
|
imgC = 3
|
|
imgH, imgW = target_shape
|
|
# cv2.imshow("image", image)
|
|
# cv2.waitKey(0)
|
|
assert imgC == image.shape[2]
|
|
max_wh_ratio = max(max_wh_ratio, imgW / imgH)
|
|
imgW = int((imgH * max_wh_ratio))
|
|
imgW = max(min(imgW, limited_max_width), limited_min_width)
|
|
h, w = image.shape[:2]
|
|
ratio = w / float(h)
|
|
ratio_imgH = math.ceil(imgH * ratio)
|
|
ratio_imgH = max(ratio_imgH, limited_min_width)
|
|
if ratio_imgH > imgW:
|
|
resized_w = imgW
|
|
else:
|
|
resized_w = int(ratio_imgH)
|
|
resized_image = cv2.resize(image, (resized_w, imgH))
|
|
# print((resized_w, imgH))
|
|
# padding_im1 = np.ones((imgH, imgW, imgC), dtype=np.uint8) * 128
|
|
# padding_im1[:, 0:resized_w, :] = resized_image
|
|
# cv2.imwrite("pad.jpg", padding_im1)
|
|
|
|
resized_image = resized_image.astype('float32')
|
|
resized_image = (resized_image.transpose((2, 0, 1)) - 127.5) / 127.5
|
|
# resized_image -= 0.5
|
|
# resized_image *= 2
|
|
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
|
padding_im[:, :, 0:resized_w] = resized_image
|
|
|
|
# np.save('fk.npy', padding_im)
|
|
|
|
return padding_im
|
|
|
|
|
|
def get_ignored_tokens():
|
|
return [0] # for ctc blank
|
|
|
|
|
|
class PPRCNNRecognitionMNN(HamburgerABC):
|
|
|
|
def __init__(self, mnn_path, character_file, *args, **kwargs):
|
|
from hyperlpr3.common.mnn_adapt import MNNAdapter
|
|
super().__init__(*args, **kwargs)
|
|
self.input_shape = (1, 3, self.input_size[0], self.input_size[1])
|
|
self.session = MNNAdapter(mnn_path, input_shape=self.input_shape, outputs_name=['output'])
|
|
self.character_list = token
|
|
|
|
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
|
|
""" convert text-index into text-label. """
|
|
result_list = []
|
|
ignored_tokens = get_ignored_tokens()
|
|
batch_size = len(text_index)
|
|
for batch_idx in range(batch_size):
|
|
char_list = []
|
|
conf_list = []
|
|
for idx in range(len(text_index[batch_idx])):
|
|
if text_index[batch_idx][idx] in ignored_tokens:
|
|
continue
|
|
if is_remove_duplicate:
|
|
# only for predict
|
|
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
|
|
continue
|
|
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
|
|
if text_prob is not None:
|
|
conf_list.append(text_prob[batch_idx][idx])
|
|
else:
|
|
conf_list.append(1)
|
|
text = ''.join(char_list)
|
|
result_list.append((text, np.mean(conf_list)))
|
|
return result_list
|
|
|
|
def _run_session(self, data):
|
|
output = self.session.inference(data)
|
|
output = output.reshape(40, 6625)
|
|
# print(output[:, 0])
|
|
output = np.expand_dims([output], 0)
|
|
return output
|
|
|
|
def _postprocess(self, data):
|
|
prod = data[0]
|
|
argmax = np.argmax(prod, axis=2)
|
|
# print(argmax)
|
|
rmax = np.max(prod, axis=2)
|
|
# print(rmax)
|
|
result = self.decode(argmax, rmax, is_remove_duplicate=True)
|
|
|
|
return result[0]
|
|
|
|
def _preprocess(self, image):
|
|
assert len(
|
|
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
|
|
"image. "
|
|
h, w, _ = image.shape
|
|
wh_ratio = w * 1.0 / h
|
|
data = encode_images(image, wh_ratio, self.input_size, )
|
|
data = np.expand_dims(data, 0)
|
|
|
|
return data
|
|
|
|
|
|
class PPRCNNRecognitionORT(HamburgerABC):
|
|
|
|
def __init__(self, onnx_path, token_dict=token, *args, **kwargs):
|
|
import onnxruntime as ort
|
|
super().__init__(*args, **kwargs)
|
|
self.session = ort.InferenceSession(onnx_path, None)
|
|
self.input_config = self.session.get_inputs()[0]
|
|
self.output_config = self.session.get_outputs()[0]
|
|
self.input_size = self.input_config.shape[2:]
|
|
# print(self.input_size)
|
|
self.character_list = token_dict
|
|
|
|
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
|
|
""" convert text-index into text-label. """
|
|
result_list = []
|
|
ignored_tokens = get_ignored_tokens()
|
|
batch_size = len(text_index)
|
|
for batch_idx in range(batch_size):
|
|
char_list = []
|
|
conf_list = []
|
|
for idx in range(len(text_index[batch_idx])):
|
|
if text_index[batch_idx][idx] in ignored_tokens:
|
|
continue
|
|
if is_remove_duplicate:
|
|
# only for predict
|
|
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
|
|
continue
|
|
# print(int(text_index[batch_idx][idx]))
|
|
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
|
|
if text_prob is not None:
|
|
conf_list.append(text_prob[batch_idx][idx])
|
|
else:
|
|
conf_list.append(1)
|
|
text = ''.join(char_list)
|
|
result_list.append((text, np.mean(conf_list)))
|
|
return result_list
|
|
|
|
# @cost("Recognition")
|
|
def _run_session(self, data) -> np.ndarray:
|
|
result = self.session.run([self.output_config.name], {self.input_config.name: data})
|
|
|
|
return result
|
|
|
|
def _postprocess(self, data) -> tuple:
|
|
if data:
|
|
prod = data[0]
|
|
argmax = np.argmax(prod, axis=2)
|
|
rmax = np.max(prod, axis=2)
|
|
result = self.decode(argmax, rmax, is_remove_duplicate=True)
|
|
|
|
return result[0]
|
|
else:
|
|
return '', 0.0
|
|
|
|
def _preprocess(self, image) -> np.ndarray:
|
|
assert len(
|
|
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
|
|
"image. "
|
|
h, w, _ = image.shape
|
|
wh_ratio = w * 1.0 / h
|
|
data = encode_images(image, wh_ratio, self.input_size, )
|
|
data = np.expand_dims(data, 0)
|
|
# print(data.shape)
|
|
|
|
return data
|
|
|
|
|
|
class PPRCNNRecognitionDNN(HamburgerABC):
|
|
|
|
def __init__(self, onnx_path, character_file, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.session = cv2.dnn.readNetFromONNX(onnx_path)
|
|
self.input_shape = (1, 3, self.input_size[0], self.input_size[1])
|
|
self.character_list = token
|
|
|
|
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
|
|
result_list = []
|
|
ignored_tokens = get_ignored_tokens()
|
|
batch_size = len(text_index)
|
|
for batch_idx in range(batch_size):
|
|
char_list = []
|
|
conf_list = []
|
|
for idx in range(len(text_index[batch_idx])):
|
|
if text_index[batch_idx][idx] in ignored_tokens:
|
|
continue
|
|
if is_remove_duplicate:
|
|
# only for predict
|
|
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
|
|
continue
|
|
char_list.append(self.character_list[int(text_index[batch_idx][idx])])
|
|
if text_prob is not None:
|
|
conf_list.append(text_prob[batch_idx][idx])
|
|
else:
|
|
conf_list.append(1)
|
|
text = ''.join(char_list)
|
|
result_list.append((text, np.mean(conf_list)))
|
|
return result_list
|
|
|
|
def _run_session(self, data):
|
|
self.session.setInput(data)
|
|
outputs = self.session.forward()
|
|
outputs = np.expand_dims(outputs, 0)
|
|
# print(outputs.shape)
|
|
|
|
return outputs
|
|
|
|
def _postprocess(self, data):
|
|
prod = data[0]
|
|
argmax = np.argmax(prod, axis=2)
|
|
rmax = np.max(prod, axis=2)
|
|
result = self.decode(argmax, rmax, is_remove_duplicate=True)
|
|
|
|
return result[0]
|
|
|
|
def _preprocess(self, image):
|
|
assert len(
|
|
image.shape) == 3, "The dimensions of the input image object do not match. The input supports a single " \
|
|
"image. "
|
|
h, w, _ = image.shape
|
|
wh_ratio = w * 1.0 / h
|
|
data = encode_images(image, wh_ratio, self.input_size, )
|
|
data = np.expand_dims(data, 0)
|
|
|
|
return data
|