From a6c460cc3a0cf6770bb34484375f79436f42afaf Mon Sep 17 00:00:00 2001
From: bleeson <bleeson@stashtea.com>
Date: Mon, 5 Aug 2024 08:32:47 -0700
Subject: [PATCH] Additional steps to clean data from SL.

---
 source_ecommerce_load_shipping.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/source_ecommerce_load_shipping.py b/source_ecommerce_load_shipping.py
index 57f64ea..63a69d1 100644
--- a/source_ecommerce_load_shipping.py
+++ b/source_ecommerce_load_shipping.py
@@ -35,6 +35,12 @@ ARCHIVE_DIRECTORY = THIS_DIRECTORY / "incoming_shipments" / "archive"
 
 def main():
     retrieve_x12_edi_files()#TODO uncomment
+    for file in INCOMING_DIRECTORY.iterdir():
+        if file.name[-4:] != '.csv':
+            continue
+        else:
+            clean_lines(file)
+            shutil.move(file, ARCHIVE_DIRECTORY / file.name)
     for file in INCOMING_DIRECTORY.iterdir():
         if file.name[-4:] != '.csv':
             continue
@@ -74,6 +80,24 @@ def retrieve_x12_edi_files():
                 sftp_connection.rename(filename, new_filename)
 
 
+def clean_lines(file):
+    """
+    Fix lines that have newlines in their descriptions
+    """
+    with open(file, 'r', encoding='utf8') as source_file:
+        with open(INCOMING_DIRECTORY / f'clean_{file.name}', 'w',newline='',encoding='utf8') as output:
+            csv_reader = csv.reader(source_file)
+            csv_writer = csv.writer(output)
+            headers = next(csv_reader)
+            data = list(csv_reader)
+            for i, row in enumerate(data):
+                if len(row) < len(headers):
+                    next_line = data.pop(i+1)
+                    csv_writer.writerow(row+next_line)
+                else:
+                    csv_writer.writerow(row)
+
+
 def process_files(file):
     with open(file, encoding='utf8') as source_file:
         with yamamotoyama.get_connection() as db_connection: