dataframe for encoding
This commit is contained in:
parent
cc6ff9325d
commit
a048b724ea
@ -20,22 +20,18 @@ print(f"Importing data from {csv_file}...")
|
|||||||
import_timestamp = datetime.datetime.now()
|
import_timestamp = datetime.datetime.now()
|
||||||
print(f"Import timestamp: {import_timestamp}")
|
print(f"Import timestamp: {import_timestamp}")
|
||||||
|
|
||||||
|
# First read the CSV file with pandas to handle encoding
|
||||||
|
print("Reading CSV file with proper encoding...")
|
||||||
|
df = pd.read_csv(csv_file, sep=';', encoding='ISO-8859-1', decimal=',')
|
||||||
|
|
||||||
# Create temporary table with CSV data, import timestamp, and hash
|
# Create temporary table with CSV data, import timestamp, and hash
|
||||||
con.execute("""
|
con.execute("""
|
||||||
CREATE TEMP TABLE IF NOT EXISTS temp_working_times AS
|
CREATE TEMP TABLE temp_working_times AS
|
||||||
WITH base_data AS (
|
WITH base_data AS (
|
||||||
SELECT
|
SELECT
|
||||||
*,
|
*,
|
||||||
'{timestamp}'::TIMESTAMP AS import_timestamp
|
'{timestamp}'::TIMESTAMP AS import_timestamp
|
||||||
FROM read_csv_auto(
|
FROM df
|
||||||
'{csv_file}',
|
|
||||||
delim=';',
|
|
||||||
header=true,
|
|
||||||
ignore_errors=true,
|
|
||||||
sample_size=1000,
|
|
||||||
auto_detect=true,
|
|
||||||
decimal_separator=','
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
*,
|
*,
|
||||||
@ -65,7 +61,7 @@ con.execute("""
|
|||||||
"Kommentar"
|
"Kommentar"
|
||||||
) AS row_hash
|
) AS row_hash
|
||||||
FROM base_data
|
FROM base_data
|
||||||
""".format(csv_file=csv_file, timestamp=import_timestamp))
|
""".format(timestamp=import_timestamp))
|
||||||
|
|
||||||
# Create the working_times table if it doesn't exist
|
# Create the working_times table if it doesn't exist
|
||||||
con.execute("""
|
con.execute("""
|
||||||
@ -85,7 +81,7 @@ con.execute("""
|
|||||||
SELECT DISTINCT "Datum" FROM temp_working_times
|
SELECT DISTINCT "Datum" FROM temp_working_times
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Get all unique dates from existing data, ignore deleted entries
|
# Get all unique dates from existing data
|
||||||
con.execute("""
|
con.execute("""
|
||||||
CREATE TEMP TABLE temp_existing_dates AS
|
CREATE TEMP TABLE temp_existing_dates AS
|
||||||
SELECT DISTINCT "Datum" FROM working_times
|
SELECT DISTINCT "Datum" FROM working_times
|
||||||
@ -107,16 +103,17 @@ con.execute("""
|
|||||||
FROM temp_working_times t
|
FROM temp_working_times t
|
||||||
JOIN working_times e ON t."Datum" = e."Datum"
|
JOIN working_times e ON t."Datum" = e."Datum"
|
||||||
WHERE NOT EXISTS (
|
WHERE NOT EXISTS (
|
||||||
SELECT 1 FROM working_times e2
|
SELECT 1
|
||||||
WHERE e2."Datum" = t."Datum"
|
FROM working_times
|
||||||
AND e2.row_hash = t.row_hash
|
WHERE "Datum" = t."Datum"
|
||||||
|
AND row_hash = t.row_hash
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Insert new data for new and changed dates
|
# Insert new data for new and changed dates
|
||||||
con.execute("""
|
con.execute("""
|
||||||
INSERT INTO working_times
|
INSERT INTO working_times
|
||||||
SELECT * FROM temp_working_times
|
SELECT *, NULL as delete FROM temp_working_times
|
||||||
WHERE "Datum" IN (
|
WHERE "Datum" IN (
|
||||||
SELECT "Datum" FROM temp_new_dates
|
SELECT "Datum" FROM temp_new_dates
|
||||||
UNION
|
UNION
|
||||||
@ -124,9 +121,6 @@ con.execute("""
|
|||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Set delete flag
|
|
||||||
# TODO
|
|
||||||
|
|
||||||
# Verify the data was imported
|
# Verify the data was imported
|
||||||
count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0]
|
count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0]
|
||||||
print(f"Successfully imported data. Total records in database: {count}")
|
print(f"Successfully imported data. Total records in database: {count}")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user