Spaces:
Running
Running
updated the create_csv, looped the number of items
Browse files
app.py
CHANGED
|
@@ -260,13 +260,13 @@ def create_csv():
|
|
| 260 |
'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
|
| 261 |
'PRICE', 'TOTAL', 'VATTAX'
|
| 262 |
]
|
| 263 |
-
|
| 264 |
# Iterate through JSON files in the folder
|
| 265 |
for filename in os.listdir(json_folder_path):
|
| 266 |
if filename.endswith(".json"):
|
| 267 |
json_file_path = os.path.join(json_folder_path, filename)
|
| 268 |
|
| 269 |
-
with open(json_file_path, 'r') as file:
|
| 270 |
data = json.load(file)
|
| 271 |
all_data = data.get('output', [])
|
| 272 |
|
|
@@ -277,21 +277,29 @@ def create_csv():
|
|
| 277 |
text = item['text'].replace('|', '') # Strip the pipe character
|
| 278 |
if label == 'VATTAX' or label == 'TOTAL':
|
| 279 |
text = replace_symbols_with_period(text.replace(' ', '')) # Remove spaces and replace symbols with periods
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
else:
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
# Writing data to CSV file with ordered columns
|
| 286 |
csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
|
| 287 |
-
with open(csv_file_path, 'w',
|
| 288 |
csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
|
| 289 |
if os.path.getsize(csv_file_path) == 0:
|
| 290 |
csv_writer.writeheader()
|
| 291 |
|
| 292 |
# Constructing rows for the CSV file
|
| 293 |
-
|
| 294 |
-
for i in range(
|
| 295 |
row_data = {}
|
| 296 |
for label in column_order:
|
| 297 |
if label in label_texts: # Check if the label exists in the dictionary
|
|
@@ -306,9 +314,9 @@ def create_csv():
|
|
| 306 |
row_data[label] = '' # If the label does not exist, set the value to an empty string
|
| 307 |
csv_writer.writerow(row_data)
|
| 308 |
|
| 309 |
-
|
| 310 |
output_file_path = r"static/temp/inferenced/output.csv"
|
| 311 |
-
with open(output_file_path, 'w', newline='') as combined_csvfile:
|
| 312 |
combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
|
| 313 |
combined_csv_writer.writeheader()
|
| 314 |
|
|
@@ -318,7 +326,7 @@ def create_csv():
|
|
| 318 |
csv_file_path = os.path.join(output_folder_path, csv_filename)
|
| 319 |
|
| 320 |
# Read data from CSV file and write to the combined CSV file
|
| 321 |
-
with open(csv_file_path, 'r') as csv_file:
|
| 322 |
csv_reader = csv.DictReader(csv_file)
|
| 323 |
for row in csv_reader:
|
| 324 |
combined_csv_writer.writerow(row)
|
|
|
|
| 260 |
'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
|
| 261 |
'PRICE', 'TOTAL', 'VATTAX'
|
| 262 |
]
|
| 263 |
+
# Save
|
| 264 |
# Iterate through JSON files in the folder
|
| 265 |
for filename in os.listdir(json_folder_path):
|
| 266 |
if filename.endswith(".json"):
|
| 267 |
json_file_path = os.path.join(json_folder_path, filename)
|
| 268 |
|
| 269 |
+
with open(json_file_path, 'r', encoding='utf-8') as file:
|
| 270 |
data = json.load(file)
|
| 271 |
all_data = data.get('output', [])
|
| 272 |
|
|
|
|
| 277 |
text = item['text'].replace('|', '') # Strip the pipe character
|
| 278 |
if label == 'VATTAX' or label == 'TOTAL':
|
| 279 |
text = replace_symbols_with_period(text.replace(' ', '')) # Remove spaces and replace symbols with periods
|
| 280 |
+
|
| 281 |
+
if label == 'TRANSACTIONTIME':
|
| 282 |
+
# Concatenate all words for 'TRANSACTIONTIME' labels
|
| 283 |
+
if label in label_texts:
|
| 284 |
+
label_texts[label][0] += ": " + text # Add a colon and a space before the text
|
| 285 |
+
else:
|
| 286 |
+
label_texts[label] = [text]
|
| 287 |
else:
|
| 288 |
+
if label in label_texts:
|
| 289 |
+
label_texts[label].append(text)
|
| 290 |
+
else:
|
| 291 |
+
label_texts[label] = [text]
|
| 292 |
|
| 293 |
# Writing data to CSV file with ordered columns
|
| 294 |
csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
|
| 295 |
+
with open(csv_file_path, 'w', encoding='utf-8') as csvfile:
|
| 296 |
csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
|
| 297 |
if os.path.getsize(csv_file_path) == 0:
|
| 298 |
csv_writer.writeheader()
|
| 299 |
|
| 300 |
# Constructing rows for the CSV file
|
| 301 |
+
num_items = len(label_texts.get('ITEMS', []))
|
| 302 |
+
for i in range(num_items):
|
| 303 |
row_data = {}
|
| 304 |
for label in column_order:
|
| 305 |
if label in label_texts: # Check if the label exists in the dictionary
|
|
|
|
| 314 |
row_data[label] = '' # If the label does not exist, set the value to an empty string
|
| 315 |
csv_writer.writerow(row_data)
|
| 316 |
|
| 317 |
+
# Combining contents of CSV files into a single CSV file
|
| 318 |
output_file_path = r"static/temp/inferenced/output.csv"
|
| 319 |
+
with open(output_file_path, 'w', newline='', encoding='utf-8') as combined_csvfile:
|
| 320 |
combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
|
| 321 |
combined_csv_writer.writeheader()
|
| 322 |
|
|
|
|
| 326 |
csv_file_path = os.path.join(output_folder_path, csv_filename)
|
| 327 |
|
| 328 |
# Read data from CSV file and write to the combined CSV file
|
| 329 |
+
with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
|
| 330 |
csv_reader = csv.DictReader(csv_file)
|
| 331 |
for row in csv_reader:
|
| 332 |
combined_csv_writer.writerow(row)
|