From fb415260ba6c76576194a3fbfe233fea25cce64d Mon Sep 17 00:00:00 2001 From: lasse Date: Tue, 1 Apr 2025 08:45:01 +0000 Subject: [PATCH] Upload files to "/" --- Links.txt | 4 ++ cronScript.sh | 20 +++++++ docker-compose.yml | 96 +++++++++++++++++++++++++++++++++ extractDobby.py | 63 ++++++++++++++++++++++ main.py | 132 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 315 insertions(+) create mode 100644 Links.txt create mode 100644 cronScript.sh create mode 100644 docker-compose.yml create mode 100644 extractDobby.py create mode 100644 main.py diff --git a/Links.txt b/Links.txt new file mode 100644 index 0000000..8e263fb --- /dev/null +++ b/Links.txt @@ -0,0 +1,4 @@ +https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix + + + diff --git a/cronScript.sh b/cronScript.sh new file mode 100644 index 0000000..7f3123a --- /dev/null +++ b/cronScript.sh @@ -0,0 +1,20 @@ +#this file is the script +# Cron will trigger once a week + +#You will find the crontab in the VM where the Containers will run +# in /tmp/crontab... +# remember to put all full paths in crontab and make it runnable with chmod +x + + + + +#build and start all containers +docker compose build --no-cache +docker compose up -d + + + +#docker stop of containers that only run for containers that already stopped +#e.g. dobby and selenium +bash statusAndStopScript.sh + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5c79e59 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,96 @@ + +version: '3.8' + +services: +#Use postgres/example user/password credentials +#container which contains the databank and its entries + db: + container_name: pg_container_test_vimes + image: postgres + restart: always + logging: + driver: json-file + options: + max-size: 10m + max-file: "3" + environment: + POSTGRES_USER: root + POSTGRES_PASSWORD: root + POSTGRES_DB: test_db_vimes + ports: + - "5432:5432" + + +#pg admin can display the databank entries in a browser +# pg_admin: +# container_name: pgadmin4_container_test_vimes +# image: dpage/pgadmin4 +# restart: always +# environment: +# PGADMIN_DEFAULT_EMAIL: admin@admin.com #somehow can not change those +# PGADMIN_DEFAULT_PASSWORD: root +# ports: +# - "5050:80" + + +#MAIN controller for whole program + py_app_main: #container called pyApp + container_name: pyapp_main_test_vimes + build: #build it with + context: . #either a path to a directory containing a Dockerfile, or a url to a git repository. + dockerfile: ./pyDockerfile #path to the Dockerfile + #somehow need ./ so that Vimes uses the same dir as path + command: python main.py --init #the command which is done + logging: #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log + driver: json-file + options: + max-size: 10m + max-file: "3" + ports: + - 3000:5432 + volumes: + - personsNames:/var/lib/data #acceses the volume "personsNames" with its path "/var/lib/data" + + + + +#container which extracts all data from Dobby using selenium + py_extract_dobby: + container_name: py_extract_dobby_test_vimes + build: + context: . + dockerfile: ./pyDockerfile #dockerfile with python image installing the required packages + logging: + driver: json-file + options: + max-size: 10m + max-file: "3" + command: python extractDobby.py + ports: + - 6666:4444 + volumes: + - personsNames:/var/lib/data #saves all names at crossnative in a volume + - "/data:/var/lib/data" #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume + #for now mount it to /data on the host machine, but maybe somewhere else later + + + +#just contains the selenium webbrowser so other containers can use it + selenium_browser: + container_name: sel_browser_test_vimes + image: selenium/standalone-chrome:4.10.0-20230607 + restart: unless-stopped + logging: + driver: json-file + options: + max-size: 10m + max-file: "3" + shm_size: '2gb' #size of shared memory, made it much bigger, so that the tab will not crash + ports: + - 4444:4444 + + + + +volumes: + personsNames: #build a volume called "my-volume" diff --git a/extractDobby.py b/extractDobby.py new file mode 100644 index 0000000..bfe638a --- /dev/null +++ b/extractDobby.py @@ -0,0 +1,63 @@ +#maybe need to import selenium here as well since selenium-webbrowser container connects to here +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException +import numpy as np +import pandas as pd + + +#import extracting Dobby databank +import source.Dobby.Dobby as Dobby + + +''' +get all Dobby data +''' +def main(): + #Dobby.testDriver() + driver = Dobby.getDriver() + ppiXGroupNames = Dobby.findGroupNames(driver) + np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s') + print(ppiXGroupNames) + + #newDriver = Dobby.getDriver() + persons = Dobby.getPersons(driver, ppiXGroupNames) + print(persons) + + + mitarbeiterAnzahl = len(persons) + print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl)) + personsArray = np.array(list(persons)) + #print(personsArray) + personsArrayTrans = personsArray.transpose() + #print(personsArrayTrans) + + + allNumbers = Dobby.getDobbyPersonData(driver, personsArray) + print(allNumbers) + allNumbersTransposed = allNumbers.transpose() + + #TODO: solve that I only use one driver and never close it + driver.close() + + dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]} + print(dictDobby) + dataframeDobby = pd.DataFrame.from_dict(dictDobby) + + + + + dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False) + np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s') +#TODO: also have to save it as csv file + + +if __name__ == '__main__': + main() + + + + + + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..20fbb6f --- /dev/null +++ b/main.py @@ -0,0 +1,132 @@ +import sqlalchemy as db #somehow pyodbc does not work in container +import numpy as np +from sqlalchemy.sql import text +from sqlalchemy_utils import database_exists, create_database +import argparse +import time + + +import source.DataBank.createDatabank as createDB +import source.DataBank.fillDatabank as fillDB + + +''' +read data of Dobby from volume +''' +def loadDobbyData(): + personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60') #important that we load strings + print(personsArray) + + Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60') #need to establish this array as array of strings, + #to be able to save strings and not floats there + for i in range(len(personsArray)): + Mitarbeiter[i,0] = personsArray[i,0] #Kuerzel + + nameParts = personsArray[i,1].split() #split the full name + nachName = nameParts[len(nameParts) - 1] #last word is last Name + vorName = " ".join(nameParts[:-1]) #all words before is first name + + Mitarbeiter[i,1] = vorName #Vorname + Mitarbeiter[i,2] = nachName + + Mitarbeiter[i,3] = 'hh' + Mitarbeiter[i,4] = '2022-01-01' + Mitarbeiter[i,5] = 'mail@example.com' + Mitarbeiter[i,6] = personsArray[i,2] + print(Mitarbeiter) + + return Mitarbeiter + + + + + + +def main(db_user, db_pass, db_host, db_port, db_name): + ''' + create argparse instance: --init if we want to create tables + ''' + parser = argparse.ArgumentParser() + #stores true if we start like: python main.py --init + parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init') + #name of argument --init is boolean_init + init = parser.parse_args().boolean_init + print(init) + + + ''' + connect to dataBank + ''' + #build our engine to connect and execute in our databank + db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name) + engine = db.create_engine(db_string, echo_pool=True) + #build the database itself, if it does not exist already + if not database_exists(engine.url): + print('db did not exist before') + create_database(engine.url) + + ''' + wait some time, so all data from the other containers can be scrapped + and saved + therefore the main.py can load all data and save it in DB + ''' + #TODO: need dependency and NOT fixed sleep time + time.sleep(600) + + + ''' + load Data scraped from Dobby + ''' + Mitarbeiter = loadDobbyData() + mitarbeiterAnzahl = len(Mitarbeiter) + + + + + ''' + Now all SQL commands + ''' + with engine.connect() as connection: + if init: + createDB.createMitarbeiter(connection) + print('create') + + + try: + for i in range(mitarbeiterAnzahl): + fillDB.inputMitarbeiter(connection, Mitarbeiter[i]) + except: + print("{} was filled in allready".format(Mitarbeiter[i][0])) + #createDB.dropMitarbeiter(connection) + + + + + +if __name__ == '__main__': + ''' + set values to connect to databank + ''' + #NOTE: you maybe have to change db_host and db_name depending on your machine + #TODO: variables in docker compose + db_user = 'root' + db_pass = 'root' + db_host = 'pg_container_test_vimes' #at least should get postgres container ID automatically + #but not always working + db_port = '5432' + db_name = 'test_db_vimes' + + main(db_user, db_pass, db_host, db_port, db_name) + + + + + + + + + + + + +