From b1840920c47500db798bb995845d19c3c60d37b6 Mon Sep 17 00:00:00 2001 From: Sivakumar Mahalingam Date: Sat, 6 Apr 2024 09:01:24 +0400 Subject: [PATCH] updated cleanse fn --- scripts/Laghima.py | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/scripts/Laghima.py b/scripts/Laghima.py index ef09890..6ccea92 100644 --- a/scripts/Laghima.py +++ b/scripts/Laghima.py @@ -7,11 +7,11 @@ # Set the Tesseract path pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/Cellar/tesseract/5.3.4_1/bin/tesseract' - +# pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe' class Laghima: - def __init__(self, model_path): - self.interpreter = tensorflow.lite.Interpreter(model_path=os.path.abspath('./models/mrz_seg.tflite')) + def __init__(self): + self.interpreter = tensorflow.lite.Interpreter(model_path=os.path.abspath('../models/mrz_seg.tflite')) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() @@ -61,11 +61,9 @@ def _cleanse_roi(self, raw_text): selection_length = len(item) break - new_list = [item for item in input_list if len(item) >= selection_length] - - output_text = '\n'.join(new_list) + output_list = [item for item in input_list if len(item) >= selection_length] - return output_text + return output_list def _get_final_check_digit(self, input_string, input_type): if input_type == 'TD3': @@ -111,37 +109,36 @@ def read_mrz(self, image_path): return self._parse_mrz(mrz_text) def _parse_mrz(self, mrz_text): - mrz_lines = mrz_text.strip().split('\n') - if len(mrz_lines) not in [2, 3]: + if len(mrz_text) not in [2, 3]: return {'status': 'FAILURE', 'message': 'Invalid MRZ format'} mrz_code_dict = {} - if len(mrz_lines) == 2: + if len(mrz_text) == 2: # add optional data field - mrz_code_dict['mrz_type'] = 'TD2' if len(mrz_lines[0]) == 36 else 'TD3' + mrz_code_dict['mrz_type'] = 'TD2' if len(mrz_text[0]) == 36 else 'TD3' # Line 1 - mrz_code_dict['document_type'] = mrz_lines[0][:1] - mrz_code_dict['country_code'] = mrz_lines[0][2:5] - names = mrz_lines[0][5:].split('<<') + mrz_code_dict['document_type'] = mrz_text[0][:1] + mrz_code_dict['country_code'] = mrz_text[0][2:5] + names = mrz_text[0][5:].split('<<') mrz_code_dict['surname'] = names[0].replace('<', ' ') mrz_code_dict['given_name'] = names[1].replace('<', ' ') # Line 2 - mrz_code_dict['document_number'] = mrz_lines[1][0:9].replace('<', '') - if self._get_check_digit(mrz_code_dict['document_number']) != mrz_lines[1][9]: + mrz_code_dict['document_number'] = mrz_text[1][0:9].replace('<', '') + if self._get_check_digit(mrz_code_dict['document_number']) != mrz_text[1][9]: return {'status': 'FAILURE', 'message': 'document number checksum is not matching'} - mrz_code_dict['nationality'] = mrz_lines[1][10:13] - mrz_code_dict['date_of_birth'] = mrz_lines[1][13:19] - if self._get_check_digit(mrz_code_dict['date_of_birth']) != mrz_lines[1][19]: + mrz_code_dict['nationality'] = mrz_text[1][10:13] + mrz_code_dict['date_of_birth'] = mrz_text[1][13:19] + if self._get_check_digit(mrz_code_dict['date_of_birth']) != mrz_text[1][19]: return {'status': 'FAILURE', 'message': 'date of birth checksum is not matching'} mrz_code_dict['date_of_birth'] = self._format_date(mrz_code_dict['date_of_birth']) - mrz_code_dict['sex'] = mrz_lines[1][20] - mrz_code_dict['date_of_expiry'] = mrz_lines[1][21:27] - if self._get_check_digit(mrz_code_dict['date_of_expiry']) != mrz_lines[1][27]: + mrz_code_dict['sex'] = mrz_text[1][20] + mrz_code_dict['date_of_expiry'] = mrz_text[1][21:27] + if self._get_check_digit(mrz_code_dict['date_of_expiry']) != mrz_text[1][27]: return {'status': 'FAILURE', 'message': 'date of expiry checksum is not matching'} mrz_code_dict['date_of_expiry'] = self._format_date(mrz_code_dict['date_of_expiry']) - if mrz_lines[1][-1] != self._get_final_check_digit(mrz_lines[1], mrz_code_dict['type']): + if mrz_text[1][-1] != self._get_final_check_digit(mrz_text[1], mrz_code_dict['type']): return {'status': 'FAILURE', 'message': 'final checksum is not matching'} # Final status @@ -150,30 +147,30 @@ def _parse_mrz(self, mrz_text): mrz_code_dict['mrz_type'] = 'TD1' # Line 1 - mrz_code_dict['document_type'] = mrz_lines[0][:2].replace('<', ' ') - mrz_code_dict['country_code'] = mrz_lines[0][2:5] - mrz_code_dict['document_number'] = mrz_lines[0][5:14] - if self._get_check_digit(mrz_code_dict['document_number']) != mrz_lines[0][14]: + mrz_code_dict['document_type'] = mrz_text[0][:2].replace('<', ' ') + mrz_code_dict['country_code'] = mrz_text[0][2:5] + mrz_code_dict['document_number'] = mrz_text[0][5:14] + if self._get_check_digit(mrz_code_dict['document_number']) != mrz_text[0][14]: return {'status': 'FAILURE', 'message': 'document number checksum is not matching'} - mrz_code_dict['optional_data_1'] = mrz_lines[0][15:].strip('<') + mrz_code_dict['optional_data_1'] = mrz_text[0][15:].strip('<') # Line 2 - mrz_code_dict['date_of_birth'] = mrz_lines[1][:6] - if self._get_check_digit(mrz_code_dict['date_of_birth']) != mrz_lines[1][6]: + mrz_code_dict['date_of_birth'] = mrz_text[1][:6] + if self._get_check_digit(mrz_code_dict['date_of_birth']) != mrz_text[1][6]: return {'status': 'FAILURE', 'message': 'date of birth checksum is not matching'} mrz_code_dict['date_of_birth'] = self._format_date(mrz_code_dict['date_of_birth']) - mrz_code_dict['sex'] = mrz_lines[1][7] - mrz_code_dict['date_of_expiry'] = mrz_lines[1][8:14] - if self._get_check_digit(mrz_code_dict['date_of_expiry']) != mrz_lines[1][14]: + mrz_code_dict['sex'] = mrz_text[1][7] + mrz_code_dict['date_of_expiry'] = mrz_text[1][8:14] + if self._get_check_digit(mrz_code_dict['date_of_expiry']) != mrz_text[1][14]: return {'status': 'FAILURE', 'message': 'date of expiry checksum is not matching'} mrz_code_dict['date_of_expiry'] = self._format_date(mrz_code_dict['date_of_expiry']) - mrz_code_dict['nationality'] = mrz_lines[1][15:18] - mrz_code_dict['optional_data_2'] = mrz_lines[0][18:29].strip('<') - if mrz_lines[1][-1] != self._get_final_check_digit(mrz_lines, mrz_code_dict['type']): + mrz_code_dict['nationality'] = mrz_text[1][15:18] + mrz_code_dict['optional_data_2'] = mrz_text[0][18:29].strip('<') + if mrz_text[1][-1] != self._get_final_check_digit(mrz_text, mrz_code_dict['type']): return {'status': 'FAILURE', 'message': 'final checksum is not matching'} # Line 3 - names = mrz_lines[2].split('<<') + names = mrz_text[2].split('<<') mrz_code_dict['surname'] = names[0].replace('<', ' ') mrz_code_dict['given_name'] = names[1].replace('<', ' ')