118 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			118 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import duckdb
 | |
| import pandas as pd
 | |
| import os
 | |
| import time
 | |
| import datetime
 | |
| 
 | |
| # Try to connect to the database with retry logic
 | |
| max_retries = 5
 | |
| retry_count = 0
 | |
| connected = False
 | |
| 
 | |
| print("Trying to connect to working_times.db...")
 | |
| 
 | |
| while not connected and retry_count < max_retries:
 | |
|     try:
 | |
|         # Try to connect with 'access_mode=read_only' to avoid lock conflicts
 | |
|         con = duckdb.connect('working_times.db')
 | |
|         connected = True
 | |
|         print("Connected to working_times.db")
 | |
|     except Exception as e:
 | |
|         retry_count += 1
 | |
|         print(f"Connection attempt {retry_count} failed: {e}")
 | |
|         if retry_count < max_retries:
 | |
|             print(f"Retrying in {retry_count} seconds...")
 | |
|             time.sleep(retry_count)
 | |
|         else:
 | |
|             print("Maximum retries reached. Exiting.")
 | |
|             exit(1)
 | |
| 
 | |
| print("Transforming data...")
 | |
| 
 | |
| # Get the transformation timestamp
 | |
| transform_timestamp = datetime.datetime.now()
 | |
| print(f"Transform timestamp: {transform_timestamp}")
 | |
| 
 | |
| # Create a new table with transformed data
 | |
| # This query will:
 | |
| # 1. Extract date and project information
 | |
| # 2. Calculate total hours per project per day
 | |
| # 3. Format the data in a more analytical friendly way
 | |
| con.execute("""
 | |
|     DROP TABLE IF EXISTS working_times_summary;
 | |
|     CREATE TABLE working_times_summary AS 
 | |
|     SELECT 
 | |
|         Datum AS date,
 | |
|         Projektname AS project_name,
 | |
|         "Leistungsart (Bezeichnung)" AS activity_type,
 | |
|         SUM("Zeit [h]") AS total_hours,
 | |
|         SUM("Zeit [h]"/8) AS total_days,
 | |
|         '{transform_timestamp}' AS transform_timestamp
 | |
|     FROM working_times
 | |
|     GROUP BY date, project_name, activity_type
 | |
|     ORDER BY date, project_name, activity_type;
 | |
| """.format(transform_timestamp=transform_timestamp))
 | |
| 
 | |
| # Create a table with project totals
 | |
| con.execute("""
 | |
|     DROP TABLE IF EXISTS project_summary;
 | |
|     CREATE TABLE project_summary AS
 | |
|     SELECT
 | |
|         Projektname AS project_name,
 | |
|         SUM("Zeit [h]") AS total_hours,
 | |
|         SUM("Zeit [h]"/8) AS total_days,
 | |
|         COUNT(DISTINCT Datum) AS days_worked,
 | |
|         MAX(import_timestamp) AS source_import_timestamp,
 | |
|         '{transform_timestamp}' AS transform_timestamp
 | |
|     FROM working_times
 | |
|     GROUP BY project_name
 | |
|     ORDER BY total_hours DESC;
 | |
| """.format(transform_timestamp=transform_timestamp))
 | |
| 
 | |
| # Create a table with daily totals
 | |
| con.execute("""
 | |
|     DROP TABLE IF EXISTS daily_summary;
 | |
|     CREATE TABLE daily_summary AS
 | |
|     SELECT
 | |
|         Datum AS date,
 | |
|         SUM("Zeit [h]") AS total_hours,
 | |
|         COUNT(*) AS entry_count,
 | |
|         COUNT(DISTINCT Projektname) AS project_count,
 | |
|         MAX(import_timestamp) AS source_import_timestamp,
 | |
|         '{transform_timestamp}' AS transform_timestamp
 | |
|     FROM working_times
 | |
|     GROUP BY date
 | |
|     ORDER BY date;
 | |
| """.format(transform_timestamp=transform_timestamp))
 | |
| 
 | |
| # Verify the data was transformed
 | |
| summary_count = con.execute("SELECT COUNT(*) FROM working_times_summary").fetchone()[0]
 | |
| project_count = con.execute("SELECT COUNT(*) FROM project_summary").fetchone()[0]
 | |
| daily_count = con.execute("SELECT COUNT(*) FROM daily_summary").fetchone()[0]
 | |
| 
 | |
| print(f"Successfully created {summary_count} records in working_times_summary table.")
 | |
| print(f"Successfully created {project_count} records in project_summary table.")
 | |
| print(f"Successfully created {daily_count} records in daily_summary table.")
 | |
| 
 | |
| # Print a sample of the summary table
 | |
| print("\nSample of working_times_summary table:")
 | |
| summary_sample = con.execute("SELECT date, project_name, activity_type, total_hours, total_days, transform_timestamp FROM working_times_summary LIMIT 5").fetchall()
 | |
| for row in summary_sample:
 | |
|     print(row)
 | |
| 
 | |
| # Print a sample of the project summary table
 | |
| print("\nProject summary (top 5 by hours):")
 | |
| project_sample = con.execute("SELECT project_name, total_hours, total_days, days_worked, transform_timestamp FROM project_summary LIMIT 5").fetchall()
 | |
| for row in project_sample:
 | |
|     print(row)
 | |
| 
 | |
| # Total hours worked
 | |
| total_hours = con.execute("SELECT SUM(total_hours) FROM project_summary").fetchone()[0]
 | |
| total_days = con.execute("SELECT SUM(total_days) FROM project_summary").fetchone()[0]
 | |
| print(f"\nTotal hours worked: {total_hours:.2f}")
 | |
| print(f"Total days worked: {total_days:.2f}")
 | |
| 
 | |
| # Close the connection
 | |
| con.close()
 | |
| 
 | |
| print("\nData transformation complete.")  |