118 lines
4.2 KiB
Python
118 lines
4.2 KiB
Python
import duckdb
|
|
import pandas as pd
|
|
import os
|
|
import time
|
|
import datetime
|
|
|
|
# Try to connect to the database with retry logic
|
|
max_retries = 5
|
|
retry_count = 0
|
|
connected = False
|
|
|
|
print("Trying to connect to working_times.db...")
|
|
|
|
while not connected and retry_count < max_retries:
|
|
try:
|
|
# Try to connect with 'access_mode=read_only' to avoid lock conflicts
|
|
con = duckdb.connect('working_times.db')
|
|
connected = True
|
|
print("Connected to working_times.db")
|
|
except Exception as e:
|
|
retry_count += 1
|
|
print(f"Connection attempt {retry_count} failed: {e}")
|
|
if retry_count < max_retries:
|
|
print(f"Retrying in {retry_count} seconds...")
|
|
time.sleep(retry_count)
|
|
else:
|
|
print("Maximum retries reached. Exiting.")
|
|
exit(1)
|
|
|
|
print("Transforming data...")
|
|
|
|
# Get the transformation timestamp
|
|
transform_timestamp = datetime.datetime.now()
|
|
print(f"Transform timestamp: {transform_timestamp}")
|
|
|
|
# Create a new table with transformed data
|
|
# This query will:
|
|
# 1. Extract date and project information
|
|
# 2. Calculate total hours per project per day
|
|
# 3. Format the data in a more analytical friendly way
|
|
con.execute("""
|
|
DROP TABLE IF EXISTS working_times_summary;
|
|
CREATE TABLE working_times_summary AS
|
|
SELECT
|
|
Datum AS date,
|
|
Projektname AS project_name,
|
|
"Leistungsart (Bezeichnung)" AS activity_type,
|
|
SUM("Zeit [h]") AS total_hours,
|
|
SUM("Zeit [h]"/8) AS total_days,
|
|
'{transform_timestamp}' AS transform_timestamp
|
|
FROM working_times
|
|
GROUP BY date, project_name, activity_type
|
|
ORDER BY date, project_name, activity_type;
|
|
""".format(transform_timestamp=transform_timestamp))
|
|
|
|
# Create a table with project totals
|
|
con.execute("""
|
|
DROP TABLE IF EXISTS project_summary;
|
|
CREATE TABLE project_summary AS
|
|
SELECT
|
|
Projektname AS project_name,
|
|
SUM("Zeit [h]") AS total_hours,
|
|
SUM("Zeit [h]"/8) AS total_days,
|
|
COUNT(DISTINCT Datum) AS days_worked,
|
|
MAX(import_timestamp) AS source_import_timestamp,
|
|
'{transform_timestamp}' AS transform_timestamp
|
|
FROM working_times
|
|
GROUP BY project_name
|
|
ORDER BY total_hours DESC;
|
|
""".format(transform_timestamp=transform_timestamp))
|
|
|
|
# Create a table with daily totals
|
|
con.execute("""
|
|
DROP TABLE IF EXISTS daily_summary;
|
|
CREATE TABLE daily_summary AS
|
|
SELECT
|
|
Datum AS date,
|
|
SUM("Zeit [h]") AS total_hours,
|
|
COUNT(*) AS entry_count,
|
|
COUNT(DISTINCT Projektname) AS project_count,
|
|
MAX(import_timestamp) AS source_import_timestamp,
|
|
'{transform_timestamp}' AS transform_timestamp
|
|
FROM working_times
|
|
GROUP BY date
|
|
ORDER BY date;
|
|
""".format(transform_timestamp=transform_timestamp))
|
|
|
|
# Verify the data was transformed
|
|
summary_count = con.execute("SELECT COUNT(*) FROM working_times_summary").fetchone()[0]
|
|
project_count = con.execute("SELECT COUNT(*) FROM project_summary").fetchone()[0]
|
|
daily_count = con.execute("SELECT COUNT(*) FROM daily_summary").fetchone()[0]
|
|
|
|
print(f"Successfully created {summary_count} records in working_times_summary table.")
|
|
print(f"Successfully created {project_count} records in project_summary table.")
|
|
print(f"Successfully created {daily_count} records in daily_summary table.")
|
|
|
|
# Print a sample of the summary table
|
|
print("\nSample of working_times_summary table:")
|
|
summary_sample = con.execute("SELECT date, project_name, activity_type, total_hours, total_days, transform_timestamp FROM working_times_summary LIMIT 5").fetchall()
|
|
for row in summary_sample:
|
|
print(row)
|
|
|
|
# Print a sample of the project summary table
|
|
print("\nProject summary (top 5 by hours):")
|
|
project_sample = con.execute("SELECT project_name, total_hours, total_days, days_worked, transform_timestamp FROM project_summary LIMIT 5").fetchall()
|
|
for row in project_sample:
|
|
print(row)
|
|
|
|
# Total hours worked
|
|
total_hours = con.execute("SELECT SUM(total_hours) FROM project_summary").fetchone()[0]
|
|
total_days = con.execute("SELECT SUM(total_days) FROM project_summary").fetchone()[0]
|
|
print(f"\nTotal hours worked: {total_hours:.2f}")
|
|
print(f"Total days worked: {total_days:.2f}")
|
|
|
|
# Close the connection
|
|
con.close()
|
|
|
|
print("\nData transformation complete.") |