157 lines
4.7 KiB
Python
157 lines
4.7 KiB
Python
import duckdb
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
|
|
# Connect to the database
|
|
try:
|
|
con = duckdb.connect('working_times.db')
|
|
print("Connected to working_times.db")
|
|
except Exception as e:
|
|
print(f"Error connecting to database: {e}")
|
|
exit(1)
|
|
|
|
# Get the current analysis timestamp
|
|
analysis_timestamp = datetime.now()
|
|
|
|
# Function to format hours
|
|
def format_hours(hours):
|
|
return f"{hours:.2f}h"
|
|
|
|
# Function to format days
|
|
def format_days(days):
|
|
return f"{days:.2f}d"
|
|
|
|
# Get the date range of the data
|
|
date_range = con.execute("""
|
|
SELECT MIN(date) AS start_date, MAX(date) AS end_date
|
|
FROM daily_summary
|
|
""").fetchone()
|
|
|
|
start_date = date_range[0]
|
|
end_date = date_range[1]
|
|
|
|
# Get the transformation timestamp (most recent)
|
|
transform_info = con.execute("""
|
|
SELECT
|
|
MAX(transform_timestamp) AS transform_timestamp,
|
|
MAX(source_import_timestamp) AS source_import_timestamp
|
|
FROM daily_summary
|
|
""").fetchone()
|
|
|
|
transform_timestamp = transform_info[0]
|
|
source_import_timestamp = transform_info[1]
|
|
|
|
print("\n" + "="*60)
|
|
print(f"WORKING TIME ANALYSIS: {start_date} to {end_date}")
|
|
print(f"ANALYSIS TIMESTAMP: {analysis_timestamp}")
|
|
print(f"DATA TRANSFORMATION: {transform_timestamp}")
|
|
print(f"DATA IMPORT: {source_import_timestamp}")
|
|
print("="*60)
|
|
|
|
# Get the total hours and days worked
|
|
totals = con.execute("""
|
|
SELECT SUM(total_hours) AS total_hours, SUM(total_days) AS total_days
|
|
FROM project_summary
|
|
""").fetchone()
|
|
|
|
total_hours = totals[0]
|
|
total_days = totals[1]
|
|
num_working_days = con.execute("SELECT COUNT(*) FROM daily_summary WHERE total_hours > 0").fetchone()[0]
|
|
avg_hours_per_day = total_hours / num_working_days if num_working_days > 0 else 0
|
|
|
|
print(f"\nTOTAL HOURS: {format_hours(total_hours)}")
|
|
print(f"TOTAL DAYS: {format_days(total_days)}")
|
|
print(f"WORKING DAYS: {num_working_days}")
|
|
print(f"AVG HOURS PER WORKING DAY: {format_hours(avg_hours_per_day)}")
|
|
|
|
# Get the top projects by hours
|
|
top_projects = con.execute("""
|
|
SELECT project_name, total_hours, total_days, days_worked
|
|
FROM project_summary
|
|
ORDER BY total_hours DESC
|
|
LIMIT 5
|
|
""").fetchall()
|
|
|
|
print("\n" + "-"*60)
|
|
print("TOP 5 PROJECTS BY HOURS")
|
|
print("-"*60)
|
|
for i, (project, hours, days, worked_days) in enumerate(top_projects, 1):
|
|
percent = (hours / total_hours) * 100
|
|
print(f"{i}. {project}")
|
|
print(f" {format_hours(hours)} ({percent:.1f}% of total) / {format_days(days)}")
|
|
print(f" Across {worked_days} days, daily average: {format_hours(hours/worked_days) if worked_days > 0 else 0}")
|
|
|
|
# Get the busiest days
|
|
busiest_days = con.execute("""
|
|
SELECT date, total_hours, project_count
|
|
FROM daily_summary
|
|
WHERE total_hours > 0
|
|
ORDER BY total_hours DESC
|
|
LIMIT 5
|
|
""").fetchall()
|
|
|
|
print("\n" + "-"*60)
|
|
print("TOP 5 BUSIEST DAYS")
|
|
print("-"*60)
|
|
for i, (date, hours, project_count) in enumerate(busiest_days, 1):
|
|
# Calculate day equivalent
|
|
day_equivalent = hours / 8
|
|
print(f"{i}. {date}: {format_hours(hours)} ({format_days(day_equivalent)}) across {project_count} projects")
|
|
|
|
# Get day distribution
|
|
day_distribution = con.execute("""
|
|
SELECT
|
|
CASE
|
|
WHEN total_hours <= 4 THEN '0-4 hours'
|
|
WHEN total_hours <= 6 THEN '4-6 hours'
|
|
WHEN total_hours <= 8 THEN '6-8 hours'
|
|
WHEN total_hours <= 10 THEN '8-10 hours'
|
|
ELSE '10+ hours'
|
|
END AS hour_range,
|
|
COUNT(*) as day_count
|
|
FROM daily_summary
|
|
WHERE total_hours > 0
|
|
GROUP BY hour_range
|
|
ORDER BY
|
|
CASE
|
|
WHEN hour_range = '0-4 hours' THEN 1
|
|
WHEN hour_range = '4-6 hours' THEN 2
|
|
WHEN hour_range = '6-8 hours' THEN 3
|
|
WHEN hour_range = '8-10 hours' THEN 4
|
|
ELSE 5
|
|
END
|
|
""").fetchall()
|
|
|
|
print("\n" + "-"*60)
|
|
print("DAY DISTRIBUTION")
|
|
print("-"*60)
|
|
for hour_range, day_count in day_distribution:
|
|
percent = (day_count / num_working_days) * 100
|
|
print(f"{hour_range}: {day_count} days ({percent:.1f}%)")
|
|
|
|
# Print an overview of project/activity combinations
|
|
project_activity_combo = con.execute("""
|
|
SELECT
|
|
project_name,
|
|
activity_type,
|
|
SUM(total_hours) as hours,
|
|
SUM(total_days) as days
|
|
FROM working_times_summary
|
|
GROUP BY project_name, activity_type
|
|
ORDER BY hours DESC
|
|
LIMIT 10
|
|
""").fetchall()
|
|
|
|
print("\n" + "-"*60)
|
|
print("TOP 10 PROJECT-ACTIVITY COMBINATIONS")
|
|
print("-"*60)
|
|
for project, activity, hours, days in project_activity_combo:
|
|
percent = (hours / total_hours) * 100
|
|
print(f"{project} - {activity}: {format_hours(hours)} ({format_days(days)}, {percent:.1f}%)")
|
|
|
|
print("\n" + "="*60)
|
|
print(f"END OF ANALYSIS - Generated at {analysis_timestamp}")
|
|
print("="*60)
|
|
|
|
# Close the connection
|
|
con.close() |