65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
import duckdb
|
|
import pandas as pd
|
|
import os
|
|
import datetime
|
|
|
|
# Create connection to DuckDB
|
|
con = duckdb.connect('working_times.db')
|
|
|
|
# Path to the CSV file
|
|
csv_file = 'data/lawi-2025-04-01-2025-04-30-2025-04-17.csv'
|
|
|
|
# Check if file exists
|
|
if not os.path.exists(csv_file):
|
|
print(f"Error: File {csv_file} not found")
|
|
exit(1)
|
|
|
|
print(f"Importing data from {csv_file}...")
|
|
|
|
# Current timestamp for the import
|
|
import_timestamp = datetime.datetime.now()
|
|
print(f"Import timestamp: {import_timestamp}")
|
|
|
|
# First, create a temporary table with the CSV data
|
|
con.execute("""
|
|
CREATE TABLE IF NOT EXISTS temp_working_times AS
|
|
SELECT * FROM read_csv_auto(
|
|
'{csv_file}',
|
|
delim=';',
|
|
header=true,
|
|
ignore_errors=true,
|
|
sample_size=1000,
|
|
auto_detect=true,
|
|
decimal_separator=','
|
|
)
|
|
""".format(csv_file=csv_file))
|
|
|
|
# Drop the existing table if it exists
|
|
con.execute("DROP TABLE IF EXISTS working_times")
|
|
|
|
# Now create the final table with the timestamp column
|
|
con.execute("""
|
|
CREATE TABLE working_times AS
|
|
SELECT
|
|
*,
|
|
'{timestamp}' AS import_timestamp
|
|
FROM temp_working_times
|
|
""".format(timestamp=import_timestamp))
|
|
|
|
# Drop the temporary table
|
|
con.execute("DROP TABLE IF EXISTS temp_working_times")
|
|
|
|
# Verify the data was imported
|
|
count = con.execute("SELECT COUNT(*) FROM working_times").fetchone()[0]
|
|
print(f"Successfully imported {count} records into the working_times table.")
|
|
|
|
# Show the table schema
|
|
print("\nTable Schema:")
|
|
schema = con.execute("DESCRIBE working_times").fetchall()
|
|
for col in schema:
|
|
print(f"{col[0]}: {col[1]}")
|
|
|
|
# Close the connection
|
|
con.close()
|
|
|
|
print("\nData import complete. Database saved to working_times.db") |