import_promato_from_excel/zeiterfassung.py

import pandas as pd
from datetime import datetime
import xlsxwriter

fileLocation = '/Users/lawi/Nextcloud/PPI/Zeiterfassung_2023.xlsx'
year = 2023
#month = 7
week = 33

# constants for Excel calculation and format
firstDate = datetime(1899, 12, 30)
totalSeconds = 86400
dateFormat='dd.mm.yyyy'
timeFormat='hh:mm'

# Read Excel files, time columns as string
erfassung = pd.read_excel(fileLocation, sheet_name='Erfassung', dtype={'Beginn': str, 'Ende': str})
projekte = pd.read_excel(fileLocation, sheet_name='Projekte')

# add week numbers
erfassung['Woche'] = erfassung['Datum'].dt.isocalendar().week

# Filter input data on specified year and month
erfassung = erfassung[erfassung['Datum'].dt.year == year]
#erfassung = erfassung[erfassung['Datum'].dt.month == month]
erfassung = erfassung[erfassung['Woche'] == week]
erfassung = erfassung[erfassung['Projekt'] != 'Pause']
# Delte rows where no start or end time is given
erfassung.dropna(subset=['Beginn', 'Ende'], inplace=True)

# transform time values (HH:MM:SS) to seconds for calculation
erfassung['Beginn'] = erfassung['Beginn'].str.split(':').apply(lambda x: int(x[0])*3600 + int(x[1])*60 + int(x[2]))
erfassung['Ende'] = erfassung['Ende'].str.split(':').apply(lambda x: int(x[0])*3600 + int(x[1])*60 + int(x[2]))
erfassung['Dauer'] = erfassung['Ende']-erfassung['Beginn']

projekte = projekte[['Projekt - Art', 'Projekt-PSP-ID', 'PSP-ID', 'Leistungsart']]


# Start time per day
promato = pd.DataFrame(erfassung.groupby(['Datum'])['Beginn'].min())
# End time per day
promato = promato.join(pd.DataFrame(erfassung.groupby(['Datum'])['Ende'].max()))
# total time per day incl breaks
promato = promato.join(pd.DataFrame(erfassung.groupby(['Datum'])['Dauer'].sum()))
# calculate break time per day
promato['Pause'] = promato['Ende'] - promato['Beginn'] - promato['Dauer']
promato = promato[['Beginn', 'Ende', 'Pause']]
# dummy column for "Abwesenheit"
promato['Abwesenheit'] = ''
# time per project (PSP-ID + Leistungsart) per day
promato = promato.merge(pd.DataFrame(erfassung.groupby(['Datum', 'Projekt'], as_index=False)['Dauer'].sum()), how='left', left_on='Datum', right_on='Datum')
# join project information to time data
promato = promato.merge(projekte, left_on='Projekt', right_on='Projekt - Art', how='left')
# join comments from excel to time data (one comment per day, PSP-ID and Leistungsart)
promato = promato.merge(pd.DataFrame(erfassung.groupby(['Datum', 'Projekt'], as_index=False)['Kommentar'].apply(lambda x: ', '.join(set(x.dropna().astype(str))))),
                on=['Datum', 'Projekt'], how='left')
promato.rename(columns={'Beginn': 'Von', 'Ende': 'Bis', 'Dauer': 'Zeit'}, inplace=True)
promato = promato[['Datum', 'Von', 'Bis', 'Pause', 'Abwesenheit', 'Projekt-PSP-ID', 'PSP-ID', 'Leistungsart', 'Zeit', 'Kommentar']]

# date and time columns to strings
#promato['Datum'] = promato['Datum'].dt.strftime('%d.%m.%Y')
#promato['Von'] = pd.to_datetime(promato['Von'], unit='s').dt.strftime('%H:%M:%S')
#promato['Bis'] = pd.to_datetime(promato['Bis'], unit='s').dt.strftime('%H:%M:%S')
#promato['Pause'] = pd.to_datetime(promato['Pause'], unit='s').dt.strftime('%H:%M:%S')
#promato['Zeit'] = pd.to_datetime(promato['Zeit'], unit='s').dt.strftime('%H:%M:%S')

# date and time columns to date / time
#promato['Datum'] = promato['Datum'].dt.date
#promato['Von'] = pd.to_datetime(promato['Von'], unit='s').dt.time
#promato['Bis'] = pd.to_datetime(promato['Bis'], unit='s').dt.time
#promato['Pause'] = pd.to_datetime(promato['Pause'], unit='s').dt.time
#promato['Zeit'] = pd.to_datetime(promato['Zeit'], unit='s').dt.time

# transfrom to excel date / time format
promato['Datum'] = (promato['Datum'] - firstDate).dt.days
promato['Von'] = promato['Von'] / totalSeconds
promato['Bis'] = promato['Bis'] / totalSeconds
promato['Pause'] = promato['Pause'] / totalSeconds
promato['Zeit'] = promato['Zeit'] / totalSeconds

#promato.to_excel('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_'+str('0'+str(month))[-2:]+'.xlsx', sheet_name='Import', index=False)

#with xlsxwriter.Workbook('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_'+str('0'+str(month))[-2:]+'.xlsx') as workbook:
with xlsxwriter.Workbook('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_W'+str('0'+str(week))[-2:]+'.xlsx') as workbook:
    worksheet = workbook.add_worksheet('Import')

    for i in range(promato.shape[1]):
        # write column header
        worksheet.write(0, i, promato.columns[i])

        # set formating for Excel
        if(promato.columns[i] in ['Von', 'Bis', 'Pause', 'Zeit']):
            excelFormat = workbook.add_format({'num_format': timeFormat})
        elif(promato.columns[i] in ['Datum']):
            excelFormat = workbook.add_format({'num_format': dateFormat})
        else:
            excelFormat = workbook.add_format({'num_format': '@'})

        for j in range(promato.shape[0]):
            # write dataframe data
                worksheet.write(j+1, i, promato.iloc[j,i], excelFormat)