From a6c460cc3a0cf6770bb34484375f79436f42afaf Mon Sep 17 00:00:00 2001 From: bleeson Date: Mon, 5 Aug 2024 08:32:47 -0700 Subject: [PATCH] Additional steps to clean data from SL. --- source_ecommerce_load_shipping.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/source_ecommerce_load_shipping.py b/source_ecommerce_load_shipping.py index 57f64ea..63a69d1 100644 --- a/source_ecommerce_load_shipping.py +++ b/source_ecommerce_load_shipping.py @@ -35,6 +35,12 @@ ARCHIVE_DIRECTORY = THIS_DIRECTORY / "incoming_shipments" / "archive" def main(): retrieve_x12_edi_files()#TODO uncomment + for file in INCOMING_DIRECTORY.iterdir(): + if file.name[-4:] != '.csv': + continue + else: + clean_lines(file) + shutil.move(file, ARCHIVE_DIRECTORY / file.name) for file in INCOMING_DIRECTORY.iterdir(): if file.name[-4:] != '.csv': continue @@ -74,6 +80,24 @@ def retrieve_x12_edi_files(): sftp_connection.rename(filename, new_filename) +def clean_lines(file): + """ + Fix lines that have newlines in their descriptions + """ + with open(file, 'r', encoding='utf8') as source_file: + with open(INCOMING_DIRECTORY / f'clean_{file.name}', 'w',newline='',encoding='utf8') as output: + csv_reader = csv.reader(source_file) + csv_writer = csv.writer(output) + headers = next(csv_reader) + data = list(csv_reader) + for i, row in enumerate(data): + if len(row) < len(headers): + next_line = data.pop(i+1) + csv_writer.writerow(row+next_line) + else: + csv_writer.writerow(row) + + def process_files(file): with open(file, encoding='utf8') as source_file: with yamamotoyama.get_connection() as db_connection: