dataframe for encoding
This commit is contained in:
		
							parent
							
								
									cc6ff9325d
								
							
						
					
					
						commit
						a048b724ea
					
				| @ -20,22 +20,18 @@ print(f"Importing data from {csv_file}...") | ||||
| import_timestamp = datetime.datetime.now() | ||||
| print(f"Import timestamp: {import_timestamp}") | ||||
| 
 | ||||
| # First read the CSV file with pandas to handle encoding | ||||
| print("Reading CSV file with proper encoding...") | ||||
| df = pd.read_csv(csv_file, sep=';', encoding='ISO-8859-1', decimal=',') | ||||
| 
 | ||||
| # Create temporary table with CSV data, import timestamp, and hash | ||||
| con.execute(""" | ||||
|     CREATE TEMP TABLE IF NOT EXISTS temp_working_times AS | ||||
|     CREATE TEMP TABLE temp_working_times AS | ||||
|     WITH base_data AS ( | ||||
|         SELECT  | ||||
|             *, | ||||
|             '{timestamp}'::TIMESTAMP AS import_timestamp | ||||
|         FROM read_csv_auto( | ||||
|             '{csv_file}',  | ||||
|             delim=';', | ||||
|             header=true, | ||||
|             ignore_errors=true, | ||||
|             sample_size=1000, | ||||
|             auto_detect=true, | ||||
|             decimal_separator=',' | ||||
|         ) | ||||
|         FROM df | ||||
|     ) | ||||
|     SELECT  | ||||
|         *, | ||||
| @ -65,7 +61,7 @@ con.execute(""" | ||||
|             "Kommentar" | ||||
|         ) AS row_hash | ||||
|     FROM base_data | ||||
| """.format(csv_file=csv_file, timestamp=import_timestamp)) | ||||
| """.format(timestamp=import_timestamp)) | ||||
| 
 | ||||
| # Create the working_times table if it doesn't exist | ||||
| con.execute(""" | ||||
| @ -85,7 +81,7 @@ con.execute(""" | ||||
|     SELECT DISTINCT "Datum" FROM temp_working_times | ||||
| """) | ||||
| 
 | ||||
| # Get all unique dates from existing data, ignore deleted entries | ||||
| # Get all unique dates from existing data | ||||
| con.execute(""" | ||||
|     CREATE TEMP TABLE temp_existing_dates AS | ||||
|     SELECT DISTINCT "Datum" FROM working_times | ||||
| @ -107,16 +103,17 @@ con.execute(""" | ||||
|     FROM temp_working_times t | ||||
|     JOIN working_times e ON t."Datum" = e."Datum" | ||||
|     WHERE NOT EXISTS ( | ||||
|         SELECT 1 FROM working_times e2 | ||||
|         WHERE e2."Datum" = t."Datum" | ||||
|         AND e2.row_hash = t.row_hash | ||||
|         SELECT 1  | ||||
|         FROM working_times | ||||
|         WHERE "Datum" = t."Datum" | ||||
|         AND row_hash = t.row_hash | ||||
|     ) | ||||
| """) | ||||
| 
 | ||||
| # Insert new data for new and changed dates | ||||
| con.execute(""" | ||||
|     INSERT INTO working_times | ||||
|     SELECT * FROM temp_working_times | ||||
|     SELECT *, NULL as delete FROM temp_working_times | ||||
|     WHERE "Datum" IN ( | ||||
|         SELECT "Datum" FROM temp_new_dates | ||||
|         UNION | ||||
| @ -124,9 +121,6 @@ con.execute(""" | ||||
|     ) | ||||
| """) | ||||
| 
 | ||||
| # Set delete flag | ||||
| # TODO | ||||
| 
 | ||||
| # Verify the data was imported | ||||
| count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0] | ||||
| print(f"Successfully imported data. Total records in database: {count}") | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user