import pandas as pd from datetime import datetime import xlsxwriter fileLocation = '/Users/lawi/Nextcloud/PPI/Zeiterfassung_2023.xlsx' year = 2023 #month = 7 week = 33 # constants for Excel calculation and format firstDate = datetime(1899, 12, 30) totalSeconds = 86400 dateFormat='dd.mm.yyyy' timeFormat='hh:mm' # Read Excel files, time columns as string erfassung = pd.read_excel(fileLocation, sheet_name='Erfassung', dtype={'Beginn': str, 'Ende': str}) projekte = pd.read_excel(fileLocation, sheet_name='Projekte') # add week numbers erfassung['Woche'] = erfassung['Datum'].dt.isocalendar().week # Filter input data on specified year and month erfassung = erfassung[erfassung['Datum'].dt.year == year] #erfassung = erfassung[erfassung['Datum'].dt.month == month] erfassung = erfassung[erfassung['Woche'] == week] erfassung = erfassung[erfassung['Projekt'] != 'Pause'] # Delte rows where no start or end time is given erfassung.dropna(subset=['Beginn', 'Ende'], inplace=True) # transform time values (HH:MM:SS) to seconds for calculation erfassung['Beginn'] = erfassung['Beginn'].str.split(':').apply(lambda x: int(x[0])*3600 + int(x[1])*60 + int(x[2])) erfassung['Ende'] = erfassung['Ende'].str.split(':').apply(lambda x: int(x[0])*3600 + int(x[1])*60 + int(x[2])) erfassung['Dauer'] = erfassung['Ende']-erfassung['Beginn'] projekte = projekte[['Projekt - Art', 'Projekt-PSP-ID', 'PSP-ID', 'Leistungsart']] # Start time per day promato = pd.DataFrame(erfassung.groupby(['Datum'])['Beginn'].min()) # End time per day promato = promato.join(pd.DataFrame(erfassung.groupby(['Datum'])['Ende'].max())) # total time per day incl breaks promato = promato.join(pd.DataFrame(erfassung.groupby(['Datum'])['Dauer'].sum())) # calculate break time per day promato['Pause'] = promato['Ende'] - promato['Beginn'] - promato['Dauer'] promato = promato[['Beginn', 'Ende', 'Pause']] # dummy column for "Abwesenheit" promato['Abwesenheit'] = '' # time per project (PSP-ID + Leistungsart) per day promato = promato.merge(pd.DataFrame(erfassung.groupby(['Datum', 'Projekt'], as_index=False)['Dauer'].sum()), how='left', left_on='Datum', right_on='Datum') # join project information to time data promato = promato.merge(projekte, left_on='Projekt', right_on='Projekt - Art', how='left') # join comments from excel to time data (one comment per day, PSP-ID and Leistungsart) promato = promato.merge(pd.DataFrame(erfassung.groupby(['Datum', 'Projekt'], as_index=False)['Kommentar'].apply(lambda x: ', '.join(set(x.dropna().astype(str))))), on=['Datum', 'Projekt'], how='left') promato.rename(columns={'Beginn': 'Von', 'Ende': 'Bis', 'Dauer': 'Zeit'}, inplace=True) promato = promato[['Datum', 'Von', 'Bis', 'Pause', 'Abwesenheit', 'Projekt-PSP-ID', 'PSP-ID', 'Leistungsart', 'Zeit', 'Kommentar']] # date and time columns to strings #promato['Datum'] = promato['Datum'].dt.strftime('%d.%m.%Y') #promato['Von'] = pd.to_datetime(promato['Von'], unit='s').dt.strftime('%H:%M:%S') #promato['Bis'] = pd.to_datetime(promato['Bis'], unit='s').dt.strftime('%H:%M:%S') #promato['Pause'] = pd.to_datetime(promato['Pause'], unit='s').dt.strftime('%H:%M:%S') #promato['Zeit'] = pd.to_datetime(promato['Zeit'], unit='s').dt.strftime('%H:%M:%S') # date and time columns to date / time #promato['Datum'] = promato['Datum'].dt.date #promato['Von'] = pd.to_datetime(promato['Von'], unit='s').dt.time #promato['Bis'] = pd.to_datetime(promato['Bis'], unit='s').dt.time #promato['Pause'] = pd.to_datetime(promato['Pause'], unit='s').dt.time #promato['Zeit'] = pd.to_datetime(promato['Zeit'], unit='s').dt.time # transfrom to excel date / time format promato['Datum'] = (promato['Datum'] - firstDate).dt.days promato['Von'] = promato['Von'] / totalSeconds promato['Bis'] = promato['Bis'] / totalSeconds promato['Pause'] = promato['Pause'] / totalSeconds promato['Zeit'] = promato['Zeit'] / totalSeconds #promato.to_excel('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_'+str('0'+str(month))[-2:]+'.xlsx', sheet_name='Import', index=False) #with xlsxwriter.Workbook('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_'+str('0'+str(month))[-2:]+'.xlsx') as workbook: with xlsxwriter.Workbook('/Users/lawi/Nextcloud/PPI/Promato_Upload_'+str(year)+'_W'+str('0'+str(week))[-2:]+'.xlsx') as workbook: worksheet = workbook.add_worksheet('Import') for i in range(promato.shape[1]): # write column header worksheet.write(0, i, promato.columns[i]) # set formating for Excel if(promato.columns[i] in ['Von', 'Bis', 'Pause', 'Zeit']): excelFormat = workbook.add_format({'num_format': timeFormat}) elif(promato.columns[i] in ['Datum']): excelFormat = workbook.add_format({'num_format': dateFormat}) else: excelFormat = workbook.add_format({'num_format': '@'}) for j in range(promato.shape[0]): # write dataframe data worksheet.write(j+1, i, promato.iloc[j,i], excelFormat)