promato_analyse/analyze_data.py
2025-04-18 22:34:10 +02:00

157 lines
4.7 KiB
Python

import duckdb
import pandas as pd
from datetime import datetime
# Connect to the database
try:
con = duckdb.connect('working_times.db')
print("Connected to working_times.db")
except Exception as e:
print(f"Error connecting to database: {e}")
exit(1)
# Get the current analysis timestamp
analysis_timestamp = datetime.now()
# Function to format hours
def format_hours(hours):
return f"{hours:.2f}h"
# Function to format days
def format_days(days):
return f"{days:.2f}d"
# Get the date range of the data
date_range = con.execute("""
SELECT MIN(date) AS start_date, MAX(date) AS end_date
FROM daily_summary
""").fetchone()
start_date = date_range[0]
end_date = date_range[1]
# Get the transformation timestamp (most recent)
transform_info = con.execute("""
SELECT
MAX(transform_timestamp) AS transform_timestamp,
MAX(source_import_timestamp) AS source_import_timestamp
FROM daily_summary
""").fetchone()
transform_timestamp = transform_info[0]
source_import_timestamp = transform_info[1]
print("\n" + "="*60)
print(f"WORKING TIME ANALYSIS: {start_date} to {end_date}")
print(f"ANALYSIS TIMESTAMP: {analysis_timestamp}")
print(f"DATA TRANSFORMATION: {transform_timestamp}")
print(f"DATA IMPORT: {source_import_timestamp}")
print("="*60)
# Get the total hours and days worked
totals = con.execute("""
SELECT SUM(total_hours) AS total_hours, SUM(total_days) AS total_days
FROM project_summary
""").fetchone()
total_hours = totals[0]
total_days = totals[1]
num_working_days = con.execute("SELECT COUNT(*) FROM daily_summary WHERE total_hours > 0").fetchone()[0]
avg_hours_per_day = total_hours / num_working_days if num_working_days > 0 else 0
print(f"\nTOTAL HOURS: {format_hours(total_hours)}")
print(f"TOTAL DAYS: {format_days(total_days)}")
print(f"WORKING DAYS: {num_working_days}")
print(f"AVG HOURS PER WORKING DAY: {format_hours(avg_hours_per_day)}")
# Get the top projects by hours
top_projects = con.execute("""
SELECT project_name, total_hours, total_days, days_worked
FROM project_summary
ORDER BY total_hours DESC
LIMIT 5
""").fetchall()
print("\n" + "-"*60)
print("TOP 5 PROJECTS BY HOURS")
print("-"*60)
for i, (project, hours, days, worked_days) in enumerate(top_projects, 1):
percent = (hours / total_hours) * 100
print(f"{i}. {project}")
print(f" {format_hours(hours)} ({percent:.1f}% of total) / {format_days(days)}")
print(f" Across {worked_days} days, daily average: {format_hours(hours/worked_days) if worked_days > 0 else 0}")
# Get the busiest days
busiest_days = con.execute("""
SELECT date, total_hours, project_count
FROM daily_summary
WHERE total_hours > 0
ORDER BY total_hours DESC
LIMIT 5
""").fetchall()
print("\n" + "-"*60)
print("TOP 5 BUSIEST DAYS")
print("-"*60)
for i, (date, hours, project_count) in enumerate(busiest_days, 1):
# Calculate day equivalent
day_equivalent = hours / 8
print(f"{i}. {date}: {format_hours(hours)} ({format_days(day_equivalent)}) across {project_count} projects")
# Get day distribution
day_distribution = con.execute("""
SELECT
CASE
WHEN total_hours <= 4 THEN '0-4 hours'
WHEN total_hours <= 6 THEN '4-6 hours'
WHEN total_hours <= 8 THEN '6-8 hours'
WHEN total_hours <= 10 THEN '8-10 hours'
ELSE '10+ hours'
END AS hour_range,
COUNT(*) as day_count
FROM daily_summary
WHERE total_hours > 0
GROUP BY hour_range
ORDER BY
CASE
WHEN hour_range = '0-4 hours' THEN 1
WHEN hour_range = '4-6 hours' THEN 2
WHEN hour_range = '6-8 hours' THEN 3
WHEN hour_range = '8-10 hours' THEN 4
ELSE 5
END
""").fetchall()
print("\n" + "-"*60)
print("DAY DISTRIBUTION")
print("-"*60)
for hour_range, day_count in day_distribution:
percent = (day_count / num_working_days) * 100
print(f"{hour_range}: {day_count} days ({percent:.1f}%)")
# Print an overview of project/activity combinations
project_activity_combo = con.execute("""
SELECT
project_name,
activity_type,
SUM(total_hours) as hours,
SUM(total_days) as days
FROM working_times_summary
GROUP BY project_name, activity_type
ORDER BY hours DESC
LIMIT 10
""").fetchall()
print("\n" + "-"*60)
print("TOP 10 PROJECT-ACTIVITY COMBINATIONS")
print("-"*60)
for project, activity, hours, days in project_activity_combo:
percent = (hours / total_hours) * 100
print(f"{project} - {activity}: {format_hours(hours)} ({format_days(days)}, {percent:.1f}%)")
print("\n" + "="*60)
print(f"END OF ANALYSIS - Generated at {analysis_timestamp}")
print("="*60)
# Close the connection
con.close()