Spaces:
Sleeping
Sleeping
added error trapping when no data was received
Browse files- Layoutlmv3_inference/ocr.py +223 -1
- app.py +27 -42
- templates/extractor.html +51 -24
Layoutlmv3_inference/ocr.py
CHANGED
|
@@ -72,7 +72,229 @@ def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilater
|
|
| 72 |
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
| 73 |
|
| 74 |
# Apply bilateral filter to reduce noise
|
| 75 |
-
img = cv2.bilateralFilter(img,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
| 78 |
return binary
|
|
|
|
| 72 |
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
| 73 |
|
| 74 |
# Apply bilateral filter to reduce noise
|
| 75 |
+
img = cv2.bilateralFilter(img, bilaterimport os
|
| 76 |
+
import pandas as pd
|
| 77 |
+
import cv2
|
| 78 |
+
import numpy as np
|
| 79 |
+
import json
|
| 80 |
+
import requests
|
| 81 |
+
import traceback
|
| 82 |
+
import tempfile
|
| 83 |
+
from rembg import remove
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
from PIL import Image
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def preprocess_image(image_path, max_file_size_mb=1, target_file_size_mb=0.5):
|
| 90 |
+
try:
|
| 91 |
+
# Read the image
|
| 92 |
+
image = cv2.imread(image_path)
|
| 93 |
+
# Enhance text
|
| 94 |
+
enhanced = enhance_txt(image)
|
| 95 |
+
|
| 96 |
+
# Save the enhanced image to a temporary file
|
| 97 |
+
temp_file_path = tempfile.NamedTemporaryFile(suffix='.jpg').name
|
| 98 |
+
cv2.imwrite(temp_file_path, enhanced)
|
| 99 |
+
|
| 100 |
+
# Check file size of the temporary file
|
| 101 |
+
file_size_mb = os.path.getsize(
|
| 102 |
+
temp_file_path) / (1024 * 1024) # Convert to megabytes
|
| 103 |
+
|
| 104 |
+
while file_size_mb > max_file_size_mb:
|
| 105 |
+
print(
|
| 106 |
+
f"File size ({file_size_mb} MB) exceeds the maximum allowed size ({max_file_size_mb} MB). Resizing the image.")
|
| 107 |
+
ratio = np.sqrt(target_file_size_mb / file_size_mb)
|
| 108 |
+
new_width = int(image.shape[1] * ratio)
|
| 109 |
+
new_height = int(image.shape[0] * ratio)
|
| 110 |
+
|
| 111 |
+
# Resize the image
|
| 112 |
+
enhanced = cv2.resize(enhanced, (new_width, new_height))
|
| 113 |
+
|
| 114 |
+
# Save the resized image to a temporary file
|
| 115 |
+
temp_file_path = tempfile.NamedTemporaryFile(suffix='.jpg').name
|
| 116 |
+
cv2.imwrite(temp_file_path, enhanced)
|
| 117 |
+
|
| 118 |
+
# Update file size
|
| 119 |
+
file_size_mb = os.path.getsize(temp_file_path) / (1024 * 1024)
|
| 120 |
+
print(f"New file size: ({file_size_mb} MB)")
|
| 121 |
+
|
| 122 |
+
# Return the final resized image
|
| 123 |
+
image_resized = cv2.imread(temp_file_path)
|
| 124 |
+
return image_resized
|
| 125 |
+
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"An error occurred in preprocess_image: {str(e)}")
|
| 128 |
+
return None
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilateral_filter_sigma_color=75, bilateral_filter_sigma_space=75):
|
| 132 |
+
# Get the width and height of the image
|
| 133 |
+
w = img.shape[1]
|
| 134 |
+
h = img.shape[0]
|
| 135 |
+
w1 = int(w * 0.05)
|
| 136 |
+
w2 = int(w * 0.95)
|
| 137 |
+
h1 = int(h * 0.05)
|
| 138 |
+
h2 = int(h * 0.95)
|
| 139 |
+
ROI = img[h1:h2, w1:w2] # 95% of the center of the image
|
| 140 |
+
threshold = np.mean(ROI) * 0.88 # % of average brightness
|
| 141 |
+
|
| 142 |
+
# Convert image to grayscale
|
| 143 |
+
grayscale_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 144 |
+
|
| 145 |
+
# Find contours
|
| 146 |
+
contours, _ = cv2.findContours(
|
| 147 |
+
grayscale_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 148 |
+
|
| 149 |
+
# # Apply Gaussian blur
|
| 150 |
+
blurred = cv2.GaussianBlur(grayscale_img, (1, 1), 0)
|
| 151 |
+
|
| 152 |
+
edged = 255 - cv2.Canny(blurred, 100, 150, apertureSize=7)
|
| 153 |
+
|
| 154 |
+
# Increase intensity by adding a constant value
|
| 155 |
+
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
| 156 |
+
|
| 157 |
+
# Apply bilateral filter to reduce noise
|
| 158 |
+
img = cv2.bilateralFilter(img, bilateral_filter_diameter,
|
| 159 |
+
bilateral_filter_sigma_color, bilateral_filter_sigma_space)
|
| 160 |
+
|
| 161 |
+
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
| 162 |
+
return binary
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def run_tesseract_on_preprocessed_image(preprocessed_image, image_path):
|
| 166 |
+
image_name = os.path.basename(image_path)
|
| 167 |
+
image_name = image_name[:image_name.find('.')]
|
| 168 |
+
|
| 169 |
+
# Create the "temp" folder if it doesn't exist
|
| 170 |
+
temp_folder = "static/temp"
|
| 171 |
+
if not os.path.exists(temp_folder):
|
| 172 |
+
os.makedirs(temp_folder)
|
| 173 |
+
|
| 174 |
+
# Define the OCR API endpoint
|
| 175 |
+
url = "https://api.ocr.space/parse/image"
|
| 176 |
+
|
| 177 |
+
# Define the API key and the language
|
| 178 |
+
api_key = os.getenv("ocr_space")
|
| 179 |
+
language = "eng"
|
| 180 |
+
|
| 181 |
+
# Save the preprocessed image
|
| 182 |
+
cv2.imwrite(os.path.join(
|
| 183 |
+
temp_folder, f"{image_name}_preprocessed.jpg"), preprocessed_image)
|
| 184 |
+
|
| 185 |
+
# Open the preprocessed image file as binary
|
| 186 |
+
with open(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), "rb") as f:
|
| 187 |
+
# Define the payload for the API request
|
| 188 |
+
payload = {
|
| 189 |
+
"apikey": api_key,
|
| 190 |
+
"language": language,
|
| 191 |
+
"isOverlayRequired": True,
|
| 192 |
+
"OCREngine": 2
|
| 193 |
+
}
|
| 194 |
+
# Define the file parameter for the API request
|
| 195 |
+
file = {
|
| 196 |
+
"file": f
|
| 197 |
+
}
|
| 198 |
+
# Send the POST request to the OCR API
|
| 199 |
+
response = requests.post(url, data=payload, files=file)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# Check the status code of the response
|
| 203 |
+
if response.status_code == 200:
|
| 204 |
+
# Parse the JSON response
|
| 205 |
+
result = response.json()
|
| 206 |
+
print("---JSON file saved")
|
| 207 |
+
# Save the OCR result as JSON
|
| 208 |
+
with open(os.path.join(temp_folder, f"{image_name}_ocr.json"), 'w') as f:
|
| 209 |
+
json.dump(result, f)
|
| 210 |
+
|
| 211 |
+
return os.path.join(temp_folder, f"{image_name}_ocr.json")
|
| 212 |
+
else:
|
| 213 |
+
raise Exception("An error occurred: " + response.text)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def clean_tesseract_output(json_output_path):
|
| 217 |
+
try:
|
| 218 |
+
with open(json_output_path, 'r') as json_file:
|
| 219 |
+
data = json.load(json_file)
|
| 220 |
+
|
| 221 |
+
lines = data['ParsedResults'][0]['TextOverlay']['Lines']
|
| 222 |
+
|
| 223 |
+
words = []
|
| 224 |
+
for line in lines:
|
| 225 |
+
for word_info in line['Words']:
|
| 226 |
+
word = {}
|
| 227 |
+
origin_box = [
|
| 228 |
+
word_info['Left'],
|
| 229 |
+
word_info['Top'],
|
| 230 |
+
word_info['Left'] + word_info['Width'],
|
| 231 |
+
word_info['Top'] + word_info['Height']
|
| 232 |
+
]
|
| 233 |
+
|
| 234 |
+
word['word_text'] = word_info['WordText']
|
| 235 |
+
word['word_box'] = origin_box
|
| 236 |
+
words.append(word)
|
| 237 |
+
|
| 238 |
+
return words
|
| 239 |
+
except (KeyError, IndexError, FileNotFoundError, json.JSONDecodeError) as e:
|
| 240 |
+
print(f"Check your Internet Connection.")
|
| 241 |
+
|
| 242 |
+
print(f"Error cleaning Tesseract output: {str(e)}")
|
| 243 |
+
return None
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def prepare_batch_for_inference(image_paths):
|
| 247 |
+
# print("my_function was called")
|
| 248 |
+
# traceback.print_stack() # This will print the stack trace
|
| 249 |
+
# Print the total number of images to be processed
|
| 250 |
+
print(f"Number of images to process: {len(image_paths)}")
|
| 251 |
+
print("1. Preparing for Inference")
|
| 252 |
+
tsv_output_paths = []
|
| 253 |
+
|
| 254 |
+
inference_batch = dict()
|
| 255 |
+
print("2. Starting Preprocessing")
|
| 256 |
+
# Ensure that the image is only 1
|
| 257 |
+
for image_path in image_paths:
|
| 258 |
+
# Print the image being processed
|
| 259 |
+
print(f"Processing the image: {image_path}")
|
| 260 |
+
print("3. Preprocessing the Receipt")
|
| 261 |
+
preprocessed_image = preprocess_image(image_path)
|
| 262 |
+
if preprocessed_image is not None:
|
| 263 |
+
try:
|
| 264 |
+
print("4. Preprocessing done. Running OCR")
|
| 265 |
+
try:
|
| 266 |
+
json_output_path = run_tesseract_on_preprocessed_image(
|
| 267 |
+
preprocessed_image, image_path)
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f"An error has occured: {str(e)}")
|
| 270 |
+
raise e
|
| 271 |
+
print("5. OCR Complete")
|
| 272 |
+
except Exception as e:
|
| 273 |
+
print(f"An error has occured: {str(e)}")
|
| 274 |
+
raise e
|
| 275 |
+
if json_output_path:
|
| 276 |
+
tsv_output_paths.append(json_output_path)
|
| 277 |
+
|
| 278 |
+
print("6. Preprocessing and OCR Done")
|
| 279 |
+
# clean_outputs is a list of lists
|
| 280 |
+
clean_outputs = [clean_tesseract_output(
|
| 281 |
+
tsv_path) for tsv_path in tsv_output_paths]
|
| 282 |
+
print("7. Cleaned OCR output")
|
| 283 |
+
word_lists = [[word['word_text'] for word in clean_output]
|
| 284 |
+
for clean_output in clean_outputs]
|
| 285 |
+
print("8. Word List Created")
|
| 286 |
+
boxes_lists = [[word['word_box'] for word in clean_output]
|
| 287 |
+
for clean_output in clean_outputs]
|
| 288 |
+
print("9. Box List Created")
|
| 289 |
+
inference_batch = {
|
| 290 |
+
"image_path": image_paths,
|
| 291 |
+
"bboxes": boxes_lists,
|
| 292 |
+
"words": word_lists
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
print("10. Prepared for Inference Batch")
|
| 296 |
+
return inference_batch
|
| 297 |
+
al_filter_diameter, bilateral_filter_sigma_color, bilateral_filter_sigma_space)
|
| 298 |
|
| 299 |
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
| 300 |
return binary
|
app.py
CHANGED
|
@@ -24,7 +24,6 @@ import signal
|
|
| 24 |
import shutil
|
| 25 |
from datetime import datetime
|
| 26 |
import zipfile
|
| 27 |
-
from pathlib import Path
|
| 28 |
|
| 29 |
# LLM
|
| 30 |
import argparse
|
|
@@ -34,10 +33,17 @@ from Layoutlmv3_inference.inference_handler import handle
|
|
| 34 |
import logging
|
| 35 |
import os
|
| 36 |
import copy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# Upload Folder
|
| 40 |
-
UPLOAD_FOLDER =
|
| 41 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 42 |
os.makedirs(UPLOAD_FOLDER)
|
| 43 |
|
|
@@ -83,7 +89,7 @@ def allowed_file(filename):
|
|
| 83 |
|
| 84 |
@app.route('/upload', methods=['GET', 'POST'])
|
| 85 |
def upload_files():
|
| 86 |
-
UPLOAD_FOLDER =
|
| 87 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 88 |
os.makedirs(UPLOAD_FOLDER)
|
| 89 |
if request.method == 'POST':
|
|
@@ -102,13 +108,12 @@ def upload_files():
|
|
| 102 |
return render_template('index.html')
|
| 103 |
|
| 104 |
|
| 105 |
-
from pathlib import Path
|
| 106 |
def make_predictions(image_paths):
|
| 107 |
-
|
| 108 |
try:
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
| 112 |
|
| 113 |
model_path = Path(r'model/export')
|
| 114 |
learner = load_learner(model_path)
|
|
@@ -126,16 +131,14 @@ def make_predictions(image_paths):
|
|
| 126 |
predicted_class_str = str(prediction_class)
|
| 127 |
|
| 128 |
predictions.append(predicted_class_str)
|
| 129 |
-
|
| 130 |
-
print(f"Prediction: {predictions}")
|
| 131 |
|
| 132 |
return predictions
|
| 133 |
|
| 134 |
except Exception as e:
|
| 135 |
return {"error in make_predictions": str(e)}
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
|
| 140 |
|
| 141 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
|
@@ -168,12 +171,8 @@ def predict_files(filenames):
|
|
| 168 |
|
| 169 |
if os.path.exists(file_path):
|
| 170 |
# Call make_predictions automatically
|
| 171 |
-
prediction_result = make_predictions([file_path])
|
| 172 |
-
|
| 173 |
-
prediction_results.append(prediction_result[0]) # Append only the first prediction result
|
| 174 |
-
else:
|
| 175 |
-
print(f"Error making prediction for {file}: {prediction_result}")
|
| 176 |
-
|
| 177 |
prediction_results_copy = copy.deepcopy(prediction_results)
|
| 178 |
|
| 179 |
non_receipt_indices = []
|
|
@@ -187,15 +186,16 @@ def predict_files(filenames):
|
|
| 187 |
if os.path.exists(file_to_remove):
|
| 188 |
os.remove(file_to_remove)
|
| 189 |
|
|
|
|
| 190 |
return render_template('extractor.html', index_url=index_url, image_paths=image_paths, prediction_results = prediction_results, predictions=dict(zip(image_paths, prediction_results_copy)))
|
| 191 |
|
| 192 |
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
#
|
| 197 |
-
|
| 198 |
-
|
| 199 |
|
| 200 |
|
| 201 |
def process_images(model_path: str, images_path: str) -> None:
|
|
@@ -205,12 +205,14 @@ def process_images(model_path: str, images_path: str) -> None:
|
|
| 205 |
inference_batch = prepare_batch_for_inference(images_path)
|
| 206 |
context = {"model_dir": model_path}
|
| 207 |
handle(inference_batch, context)
|
| 208 |
-
except Exception as
|
|
|
|
| 209 |
os.makedirs('log', exist_ok=True)
|
| 210 |
logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
|
| 211 |
format='%(asctime)s %(levelname)s %(name)s %(message)s')
|
| 212 |
logger = logging.getLogger(__name__)
|
| 213 |
logger.error(err)
|
|
|
|
| 214 |
|
| 215 |
@app.route('/run_inference', methods=['GET'])
|
| 216 |
def run_inference():
|
|
@@ -335,24 +337,8 @@ def create_csv():
|
|
| 335 |
|
| 336 |
except Exception as e:
|
| 337 |
print(f"An error occurred in create_csv: {str(e)}")
|
| 338 |
-
return
|
| 339 |
|
| 340 |
-
except Exception as e:
|
| 341 |
-
print(f"An error occurred in create_csv: {str(e)}")
|
| 342 |
-
return None
|
| 343 |
-
|
| 344 |
-
except FileNotFoundError as e:
|
| 345 |
-
print(f"File not found error: {str(e)}")
|
| 346 |
-
return jsonify({'error': 'File not found.'}), 404
|
| 347 |
-
except json.JSONDecodeError as e:
|
| 348 |
-
print(f"JSON decoding error: {str(e)}")
|
| 349 |
-
return jsonify({'error': 'JSON decoding error.'}), 500
|
| 350 |
-
except csv.Error as e:
|
| 351 |
-
print(f"CSV error: {str(e)}")
|
| 352 |
-
return jsonify({'error': 'CSV error.'}), 500
|
| 353 |
-
except Exception as e:
|
| 354 |
-
print(f"An unexpected error occurred: {str(e)}")
|
| 355 |
-
return jsonify({'error': 'An unexpected error occurred.'}), 500
|
| 356 |
|
| 357 |
@app.route('/get_data')
|
| 358 |
def get_data():
|
|
@@ -372,6 +358,5 @@ def download_csv():
|
|
| 372 |
return jsonify({"error": f"Download failed: {str(e)}"})
|
| 373 |
|
| 374 |
|
| 375 |
-
|
| 376 |
if __name__ == '__main__':
|
| 377 |
app.run(debug=True)
|
|
|
|
| 24 |
import shutil
|
| 25 |
from datetime import datetime
|
| 26 |
import zipfile
|
|
|
|
| 27 |
|
| 28 |
# LLM
|
| 29 |
import argparse
|
|
|
|
| 33 |
import logging
|
| 34 |
import os
|
| 35 |
import copy
|
| 36 |
+
import warnings
|
| 37 |
+
warnings.filterwarnings("ignore", category=UserWarning, module='torch.serialization', lineno=1113)
|
| 38 |
+
warnings.filterwarnings("ignore")
|
| 39 |
+
from torch.serialization import SourceChangeWarning
|
| 40 |
+
|
| 41 |
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
| 42 |
+
warnings.filterwarnings("ignore", category=SourceChangeWarning)
|
| 43 |
|
| 44 |
|
| 45 |
# Upload Folder
|
| 46 |
+
UPLOAD_FOLDER = 'static/temp/uploads'
|
| 47 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 48 |
os.makedirs(UPLOAD_FOLDER)
|
| 49 |
|
|
|
|
| 89 |
|
| 90 |
@app.route('/upload', methods=['GET', 'POST'])
|
| 91 |
def upload_files():
|
| 92 |
+
UPLOAD_FOLDER = 'static/temp/uploads'
|
| 93 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 94 |
os.makedirs(UPLOAD_FOLDER)
|
| 95 |
if request.method == 'POST':
|
|
|
|
| 108 |
return render_template('index.html')
|
| 109 |
|
| 110 |
|
|
|
|
| 111 |
def make_predictions(image_paths):
|
| 112 |
+
temp = None
|
| 113 |
try:
|
| 114 |
+
# For Windows OS
|
| 115 |
+
temp = pathlib.PosixPath # Save the original state
|
| 116 |
+
pathlib.PosixPath = pathlib.WindowsPath # Change to WindowsPath temporarily
|
| 117 |
|
| 118 |
model_path = Path(r'model/export')
|
| 119 |
learner = load_learner(model_path)
|
|
|
|
| 131 |
predicted_class_str = str(prediction_class)
|
| 132 |
|
| 133 |
predictions.append(predicted_class_str)
|
|
|
|
|
|
|
| 134 |
|
| 135 |
return predictions
|
| 136 |
|
| 137 |
except Exception as e:
|
| 138 |
return {"error in make_predictions": str(e)}
|
| 139 |
|
| 140 |
+
finally:
|
| 141 |
+
pathlib.PosixPath = temp
|
| 142 |
|
| 143 |
|
| 144 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
|
|
|
| 171 |
|
| 172 |
if os.path.exists(file_path):
|
| 173 |
# Call make_predictions automatically
|
| 174 |
+
prediction_result = make_predictions([file_path]) # Pass file_path as a list
|
| 175 |
+
prediction_results.append(prediction_result[0]) # Append only the first prediction result
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
prediction_results_copy = copy.deepcopy(prediction_results)
|
| 177 |
|
| 178 |
non_receipt_indices = []
|
|
|
|
| 186 |
if os.path.exists(file_to_remove):
|
| 187 |
os.remove(file_to_remove)
|
| 188 |
|
| 189 |
+
|
| 190 |
return render_template('extractor.html', index_url=index_url, image_paths=image_paths, prediction_results = prediction_results, predictions=dict(zip(image_paths, prediction_results_copy)))
|
| 191 |
|
| 192 |
|
| 193 |
|
| 194 |
+
@app.route('/get_inference_image')
|
| 195 |
+
def get_inference_image():
|
| 196 |
+
# Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
|
| 197 |
+
inferenced_image = 'static/temp/inferenced/temp_inference.jpg'
|
| 198 |
+
return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
|
| 199 |
|
| 200 |
|
| 201 |
def process_images(model_path: str, images_path: str) -> None:
|
|
|
|
| 205 |
inference_batch = prepare_batch_for_inference(images_path)
|
| 206 |
context = {"model_dir": model_path}
|
| 207 |
handle(inference_batch, context)
|
| 208 |
+
except Exception as e:
|
| 209 |
+
print("No Internet connection.")
|
| 210 |
os.makedirs('log', exist_ok=True)
|
| 211 |
logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
|
| 212 |
format='%(asctime)s %(levelname)s %(name)s %(message)s')
|
| 213 |
logger = logging.getLogger(__name__)
|
| 214 |
logger.error(err)
|
| 215 |
+
return redirect(url_for('index'))
|
| 216 |
|
| 217 |
@app.route('/run_inference', methods=['GET'])
|
| 218 |
def run_inference():
|
|
|
|
| 337 |
|
| 338 |
except Exception as e:
|
| 339 |
print(f"An error occurred in create_csv: {str(e)}")
|
| 340 |
+
return render_template('extractor.html', error_message=str(e))
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
@app.route('/get_data')
|
| 344 |
def get_data():
|
|
|
|
| 358 |
return jsonify({"error": f"Download failed: {str(e)}"})
|
| 359 |
|
| 360 |
|
|
|
|
| 361 |
if __name__ == '__main__':
|
| 362 |
app.run(debug=True)
|
templates/extractor.html
CHANGED
|
@@ -223,6 +223,10 @@
|
|
| 223 |
|
| 224 |
|
| 225 |
function updateTable(data) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
Papa.parse(data, {
|
| 227 |
header: true,
|
| 228 |
skipEmptyLines: true,
|
|
@@ -230,37 +234,60 @@
|
|
| 230 |
const tbody = document.querySelector('#dataTable tbody');
|
| 231 |
tbody.innerHTML = ''; // Clear existing rows
|
| 232 |
|
| 233 |
-
results.data.
|
| 234 |
-
const RECEIPTNUMBER = row['RECEIPTNUMBER'] || '';
|
| 235 |
-
const MERCHANTNAME = row['MERCHANTNAME'] || '';
|
| 236 |
-
const MERCHANTADDRESS = row['MERCHANTADDRESS'] || '';
|
| 237 |
-
const TRANSACTIONDATE = row['TRANSACTIONDATE'] || '';
|
| 238 |
-
const TRANSACTIONTIME = row['TRANSACTIONTIME'] || '';
|
| 239 |
-
const ITEMS = row['ITEMS'] || '';
|
| 240 |
-
const PRICE = row['PRICE'] || '';
|
| 241 |
-
const TOTAL = row['TOTAL'] || '';
|
| 242 |
-
const VATTAX = row['VATTAX'] || '';
|
| 243 |
-
|
| 244 |
const tr = document.createElement('tr');
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
tbody.appendChild(tr);
|
| 257 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
}
|
| 259 |
});
|
| 260 |
}
|
| 261 |
-
|
| 262 |
});
|
| 263 |
|
|
|
|
| 264 |
document.querySelector('#downloadButton').addEventListener('click', function (event) {
|
| 265 |
event.preventDefault(); // Prevent the default action
|
| 266 |
|
|
|
|
| 223 |
|
| 224 |
|
| 225 |
function updateTable(data) {
|
| 226 |
+
if (!data || data.trim() === '') {
|
| 227 |
+
return 'Error extracting the information: data is empty';
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
Papa.parse(data, {
|
| 231 |
header: true,
|
| 232 |
skipEmptyLines: true,
|
|
|
|
| 234 |
const tbody = document.querySelector('#dataTable tbody');
|
| 235 |
tbody.innerHTML = ''; // Clear existing rows
|
| 236 |
|
| 237 |
+
if (results.data.length === 0) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
const tr = document.createElement('tr');
|
| 239 |
+
const td = document.createElement('td');
|
| 240 |
+
td.colSpan = 9;
|
| 241 |
+
td.style.textAlign = 'center'; // Center the content
|
| 242 |
+
td.style.fontSize = '1.2em'; // Increase the font size
|
| 243 |
+
td.style.height = '100px'; // Set the height of the row
|
| 244 |
+
td.style.verticalAlign = 'middle'; // Vertically center the content
|
| 245 |
+
|
| 246 |
+
const icon = document.createElement('i');
|
| 247 |
+
icon.className = 'fas fa-exclamation-triangle'; // Replace with the class of your error icon
|
| 248 |
+
td.appendChild(icon);
|
| 249 |
+
|
| 250 |
+
const br = document.createElement('br'); // Create a line break element
|
| 251 |
+
td.appendChild(br); // Append the line break element
|
| 252 |
+
|
| 253 |
+
const text = document.createTextNode(' An error occurred in extracting the data. Please contact the developers.');
|
| 254 |
+
td.appendChild(text);
|
| 255 |
+
|
| 256 |
+
tr.appendChild(td);
|
| 257 |
tbody.appendChild(tr);
|
| 258 |
+
} else {
|
| 259 |
+
results.data.forEach(row => {
|
| 260 |
+
const RECEIPTNUMBER = row['RECEIPTNUMBER'] || '';
|
| 261 |
+
const MERCHANTNAME = row['MERCHANTNAME'] || '';
|
| 262 |
+
const MERCHANTADDRESS = row['MERCHANTADDRESS'] || '';
|
| 263 |
+
const TRANSACTIONDATE = row['TRANSACTIONDATE'] || '';
|
| 264 |
+
const TRANSACTIONTIME = row['TRANSACTIONTIME'] || '';
|
| 265 |
+
const ITEMS = row['ITEMS'] || '';
|
| 266 |
+
const PRICE = row['PRICE'] || '';
|
| 267 |
+
const TOTAL = row['TOTAL'] || '';
|
| 268 |
+
const VATTAX = row['VATTAX'] || '';
|
| 269 |
+
|
| 270 |
+
const tr = document.createElement('tr');
|
| 271 |
+
tr.innerHTML = `
|
| 272 |
+
<td contenteditable="true">${RECEIPTNUMBER}</td>
|
| 273 |
+
<td contenteditable="true">${MERCHANTNAME}</td>
|
| 274 |
+
<td contenteditable="true">${MERCHANTADDRESS}</td>
|
| 275 |
+
<td contenteditable="true">${TRANSACTIONDATE}</td>
|
| 276 |
+
<td contenteditable="true">${TRANSACTIONTIME}</td>
|
| 277 |
+
<td contenteditable="true">${ITEMS}</td>
|
| 278 |
+
<td contenteditable="true">${PRICE}</td>
|
| 279 |
+
<td contenteditable="true">${TOTAL}</td>
|
| 280 |
+
<td contenteditable="true">${VATTAX}</td>
|
| 281 |
+
`;
|
| 282 |
+
tbody.appendChild(tr);
|
| 283 |
+
});
|
| 284 |
+
}
|
| 285 |
}
|
| 286 |
});
|
| 287 |
}
|
|
|
|
| 288 |
});
|
| 289 |
|
| 290 |
+
|
| 291 |
document.querySelector('#downloadButton').addEventListener('click', function (event) {
|
| 292 |
event.preventDefault(); // Prevent the default action
|
| 293 |
|