Additional steps to clean data from SL.

2024-08-05 08:32:47 -07:00 · 2024-08-05 08:32:47 -07:00 · a6c460cc3a
parent 9f22df7940
commit a6c460cc3a
1 changed files with 24 additions and 0 deletions
--- a/source_ecommerce_load_shipping.py
+++ b/source_ecommerce_load_shipping.py
@ -35,6 +35,12 @@ ARCHIVE_DIRECTORY = THIS_DIRECTORY / "incoming_shipments" / "archive"

 def main():
    retrieve_x12_edi_files()#TODO uncomment
+    for file in INCOMING_DIRECTORY.iterdir():
+        if file.name[-4:] != '.csv':
+            continue
+        else:
+            clean_lines(file)
+            shutil.move(file, ARCHIVE_DIRECTORY / file.name)
    for file in INCOMING_DIRECTORY.iterdir():
        if file.name[-4:] != '.csv':
            continue
@ -74,6 +80,24 @@ def retrieve_x12_edi_files():
                sftp_connection.rename(filename, new_filename)


+def clean_lines(file):
+    """
+    Fix lines that have newlines in their descriptions
+    """
+    with open(file, 'r', encoding='utf8') as source_file:
+        with open(INCOMING_DIRECTORY / f'clean_{file.name}', 'w',newline='',encoding='utf8') as output:
+            csv_reader = csv.reader(source_file)
+            csv_writer = csv.writer(output)
+            headers = next(csv_reader)
+            data = list(csv_reader)
+            for i, row in enumerate(data):
+                if len(row) < len(headers):
+                    next_line = data.pop(i+1)
+                    csv_writer.writerow(row+next_line)
+                else:
+                    csv_writer.writerow(row)
+
+
 def process_files(file):
    with open(file, encoding='utf8') as source_file:
        with yamamotoyama.get_connection() as db_connection: