diff --git a/import_data.py b/import_data.py index ad4d6cd..70d4b07 100644 --- a/import_data.py +++ b/import_data.py @@ -20,22 +20,18 @@ print(f"Importing data from {csv_file}...") import_timestamp = datetime.datetime.now() print(f"Import timestamp: {import_timestamp}") +# First read the CSV file with pandas to handle encoding +print("Reading CSV file with proper encoding...") +df = pd.read_csv(csv_file, sep=';', encoding='ISO-8859-1', decimal=',') + # Create temporary table with CSV data, import timestamp, and hash con.execute(""" - CREATE TEMP TABLE IF NOT EXISTS temp_working_times AS + CREATE TEMP TABLE temp_working_times AS WITH base_data AS ( SELECT *, '{timestamp}'::TIMESTAMP AS import_timestamp - FROM read_csv_auto( - '{csv_file}', - delim=';', - header=true, - ignore_errors=true, - sample_size=1000, - auto_detect=true, - decimal_separator=',' - ) + FROM df ) SELECT *, @@ -65,7 +61,7 @@ con.execute(""" "Kommentar" ) AS row_hash FROM base_data -""".format(csv_file=csv_file, timestamp=import_timestamp)) +""".format(timestamp=import_timestamp)) # Create the working_times table if it doesn't exist con.execute(""" @@ -85,7 +81,7 @@ con.execute(""" SELECT DISTINCT "Datum" FROM temp_working_times """) -# Get all unique dates from existing data, ignore deleted entries +# Get all unique dates from existing data con.execute(""" CREATE TEMP TABLE temp_existing_dates AS SELECT DISTINCT "Datum" FROM working_times @@ -107,16 +103,17 @@ con.execute(""" FROM temp_working_times t JOIN working_times e ON t."Datum" = e."Datum" WHERE NOT EXISTS ( - SELECT 1 FROM working_times e2 - WHERE e2."Datum" = t."Datum" - AND e2.row_hash = t.row_hash + SELECT 1 + FROM working_times + WHERE "Datum" = t."Datum" + AND row_hash = t.row_hash ) """) # Insert new data for new and changed dates con.execute(""" INSERT INTO working_times - SELECT * FROM temp_working_times + SELECT *, NULL as delete FROM temp_working_times WHERE "Datum" IN ( SELECT "Datum" FROM temp_new_dates UNION @@ -124,9 +121,6 @@ con.execute(""" ) """) -# Set delete flag -# TODO - # Verify the data was imported count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0] print(f"Successfully imported data. Total records in database: {count}")