implemented updates
This commit is contained in:
parent
56d49c71cf
commit
cc6ff9325d
126
import_data.py
126
import_data.py
@ -20,38 +20,116 @@ print(f"Importing data from {csv_file}...")
|
||||
import_timestamp = datetime.datetime.now()
|
||||
print(f"Import timestamp: {import_timestamp}")
|
||||
|
||||
# First, create a temporary table with the CSV data
|
||||
# Create temporary table with CSV data, import timestamp, and hash
|
||||
con.execute("""
|
||||
CREATE TABLE IF NOT EXISTS temp_working_times AS
|
||||
SELECT * FROM read_csv_auto(
|
||||
'{csv_file}',
|
||||
delim=';',
|
||||
header=true,
|
||||
ignore_errors=true,
|
||||
sample_size=1000,
|
||||
auto_detect=true,
|
||||
decimal_separator=','
|
||||
CREATE TEMP TABLE IF NOT EXISTS temp_working_times AS
|
||||
WITH base_data AS (
|
||||
SELECT
|
||||
*,
|
||||
'{timestamp}'::TIMESTAMP AS import_timestamp
|
||||
FROM read_csv_auto(
|
||||
'{csv_file}',
|
||||
delim=';',
|
||||
header=true,
|
||||
ignore_errors=true,
|
||||
sample_size=1000,
|
||||
auto_detect=true,
|
||||
decimal_separator=','
|
||||
)
|
||||
)
|
||||
""".format(csv_file=csv_file))
|
||||
|
||||
# Drop the existing table if it exists
|
||||
con.execute("DROP TABLE IF EXISTS working_times")
|
||||
|
||||
# Now create the final table with the timestamp column
|
||||
con.execute("""
|
||||
CREATE TABLE working_times AS
|
||||
SELECT
|
||||
*,
|
||||
'{timestamp}' AS import_timestamp
|
||||
FROM temp_working_times
|
||||
""".format(timestamp=import_timestamp))
|
||||
md5(
|
||||
"Datum" || '|' ||
|
||||
"Von" || '|' ||
|
||||
"Bis" || '|' ||
|
||||
"Pause" || '|' ||
|
||||
"Gebucht von" || '|' ||
|
||||
"Gebucht am" || '|' ||
|
||||
"Name" || '|' ||
|
||||
"Kennung" || '|' ||
|
||||
"Projekt-PSP-ID" || '|' ||
|
||||
"Projektname" || '|' ||
|
||||
"PSP-ID" || '|' ||
|
||||
"PSP-Name" || '|' ||
|
||||
"ProMaTo-Projekt-Nr." || '|' ||
|
||||
"ProMaTo-Projekt" || '|' ||
|
||||
"Kategorie" || '|' ||
|
||||
"Gruppenname" || '|' ||
|
||||
"Jira-Epic" || '|' ||
|
||||
"Aufgaben-Nr." || '|' ||
|
||||
"Aufgabenname" || '|' ||
|
||||
"Leistungsart" || '|' ||
|
||||
"Leistungsart (Bezeichnung)" || '|' ||
|
||||
"Zeit [h]" || '|' ||
|
||||
"Kommentar"
|
||||
) AS row_hash
|
||||
FROM base_data
|
||||
""".format(csv_file=csv_file, timestamp=import_timestamp))
|
||||
|
||||
# Drop the temporary table
|
||||
con.execute("DROP TABLE IF EXISTS temp_working_times")
|
||||
# Create the working_times table if it doesn't exist
|
||||
con.execute("""
|
||||
CREATE TABLE IF NOT EXISTS working_times AS
|
||||
SELECT * FROM temp_working_times LIMIT 0
|
||||
""")
|
||||
|
||||
# Add column for delete flag if it doesn't exist
|
||||
con.execute("""
|
||||
ALTER TABLE working_times
|
||||
ADD COLUMN IF NOT EXISTS delete BOOLEAN DEFAULT NULL
|
||||
""")
|
||||
|
||||
# Get all unique dates from the new data
|
||||
con.execute("""
|
||||
CREATE TEMP TABLE temp_import_dates AS
|
||||
SELECT DISTINCT "Datum" FROM temp_working_times
|
||||
""")
|
||||
|
||||
# Get all unique dates from existing data, ignore deleted entries
|
||||
con.execute("""
|
||||
CREATE TEMP TABLE temp_existing_dates AS
|
||||
SELECT DISTINCT "Datum" FROM working_times
|
||||
""")
|
||||
|
||||
# Find dates that are new (not in existing data)
|
||||
con.execute("""
|
||||
CREATE TEMP TABLE temp_new_dates AS
|
||||
SELECT t."Datum"
|
||||
FROM temp_import_dates t
|
||||
LEFT JOIN temp_existing_dates e ON t."Datum" = e."Datum"
|
||||
WHERE e."Datum" IS NULL
|
||||
""")
|
||||
|
||||
# Find dates that exist in both tables and might have changes
|
||||
con.execute("""
|
||||
CREATE TEMP TABLE temp_changed_dates AS
|
||||
SELECT DISTINCT t."Datum"
|
||||
FROM temp_working_times t
|
||||
JOIN working_times e ON t."Datum" = e."Datum"
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM working_times e2
|
||||
WHERE e2."Datum" = t."Datum"
|
||||
AND e2.row_hash = t.row_hash
|
||||
)
|
||||
""")
|
||||
|
||||
# Insert new data for new and changed dates
|
||||
con.execute("""
|
||||
INSERT INTO working_times
|
||||
SELECT * FROM temp_working_times
|
||||
WHERE "Datum" IN (
|
||||
SELECT "Datum" FROM temp_new_dates
|
||||
UNION
|
||||
SELECT "Datum" FROM temp_changed_dates
|
||||
)
|
||||
""")
|
||||
|
||||
# Set delete flag
|
||||
# TODO
|
||||
|
||||
# Verify the data was imported
|
||||
count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0]
|
||||
print(f"Successfully imported {count} records into the working_times table.")
|
||||
print(f"Successfully imported data. Total records in database: {count}")
|
||||
|
||||
# Show the table schema
|
||||
print("\nTable Schema:")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user