Upload files to "/"

This commit is contained in:
lasse 2025-04-01 08:45:01 +00:00
commit fb415260ba
5 changed files with 315 additions and 0 deletions

4
Links.txt Normal file
View File

@ -0,0 +1,4 @@
https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix

20
cronScript.sh Normal file
View File

@ -0,0 +1,20 @@
#this file is the script
# Cron will trigger once a week
#You will find the crontab in the VM where the Containers will run
# in /tmp/crontab...
# remember to put all full paths in crontab and make it runnable with chmod +x
#build and start all containers
docker compose build --no-cache
docker compose up -d
#docker stop of containers that only run for containers that already stopped
#e.g. dobby and selenium
bash statusAndStopScript.sh

96
docker-compose.yml Normal file
View File

@ -0,0 +1,96 @@
version: '3.8'
services:
#Use postgres/example user/password credentials
#container which contains the databank and its entries
db:
container_name: pg_container_test_vimes
image: postgres
restart: always
logging:
driver: json-file
options:
max-size: 10m
max-file: "3"
environment:
POSTGRES_USER: root
POSTGRES_PASSWORD: root
POSTGRES_DB: test_db_vimes
ports:
- "5432:5432"
#pg admin can display the databank entries in a browser
# pg_admin:
# container_name: pgadmin4_container_test_vimes
# image: dpage/pgadmin4
# restart: always
# environment:
# PGADMIN_DEFAULT_EMAIL: admin@admin.com #somehow can not change those
# PGADMIN_DEFAULT_PASSWORD: root
# ports:
# - "5050:80"
#MAIN controller for whole program
py_app_main: #container called pyApp
container_name: pyapp_main_test_vimes
build: #build it with
context: . #either a path to a directory containing a Dockerfile, or a url to a git repository.
dockerfile: ./pyDockerfile #path to the Dockerfile
#somehow need ./ so that Vimes uses the same dir as path
command: python main.py --init #the command which is done
logging: #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log
driver: json-file
options:
max-size: 10m
max-file: "3"
ports:
- 3000:5432
volumes:
- personsNames:/var/lib/data #acceses the volume "personsNames" with its path "/var/lib/data"
#container which extracts all data from Dobby using selenium
py_extract_dobby:
container_name: py_extract_dobby_test_vimes
build:
context: .
dockerfile: ./pyDockerfile #dockerfile with python image installing the required packages
logging:
driver: json-file
options:
max-size: 10m
max-file: "3"
command: python extractDobby.py
ports:
- 6666:4444
volumes:
- personsNames:/var/lib/data #saves all names at crossnative in a volume
- "/data:/var/lib/data" #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume
#for now mount it to /data on the host machine, but maybe somewhere else later
#just contains the selenium webbrowser so other containers can use it
selenium_browser:
container_name: sel_browser_test_vimes
image: selenium/standalone-chrome:4.10.0-20230607
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 10m
max-file: "3"
shm_size: '2gb' #size of shared memory, made it much bigger, so that the tab will not crash
ports:
- 4444:4444
volumes:
personsNames: #build a volume called "my-volume"

63
extractDobby.py Normal file
View File

@ -0,0 +1,63 @@
#maybe need to import selenium here as well since selenium-webbrowser container connects to here
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import numpy as np
import pandas as pd
#import extracting Dobby databank
import source.Dobby.Dobby as Dobby
'''
get all Dobby data
'''
def main():
#Dobby.testDriver()
driver = Dobby.getDriver()
ppiXGroupNames = Dobby.findGroupNames(driver)
np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s')
print(ppiXGroupNames)
#newDriver = Dobby.getDriver()
persons = Dobby.getPersons(driver, ppiXGroupNames)
print(persons)
mitarbeiterAnzahl = len(persons)
print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl))
personsArray = np.array(list(persons))
#print(personsArray)
personsArrayTrans = personsArray.transpose()
#print(personsArrayTrans)
allNumbers = Dobby.getDobbyPersonData(driver, personsArray)
print(allNumbers)
allNumbersTransposed = allNumbers.transpose()
#TODO: solve that I only use one driver and never close it
driver.close()
dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]}
print(dictDobby)
dataframeDobby = pd.DataFrame.from_dict(dictDobby)
dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False)
np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s')
#TODO: also have to save it as csv file
if __name__ == '__main__':
main()

132
main.py Normal file
View File

@ -0,0 +1,132 @@
import sqlalchemy as db #somehow pyodbc does not work in container
import numpy as np
from sqlalchemy.sql import text
from sqlalchemy_utils import database_exists, create_database
import argparse
import time
import source.DataBank.createDatabank as createDB
import source.DataBank.fillDatabank as fillDB
'''
read data of Dobby from volume
'''
def loadDobbyData():
personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60') #important that we load strings
print(personsArray)
Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60') #need to establish this array as array of strings,
#to be able to save strings and not floats there
for i in range(len(personsArray)):
Mitarbeiter[i,0] = personsArray[i,0] #Kuerzel
nameParts = personsArray[i,1].split() #split the full name
nachName = nameParts[len(nameParts) - 1] #last word is last Name
vorName = " ".join(nameParts[:-1]) #all words before is first name
Mitarbeiter[i,1] = vorName #Vorname
Mitarbeiter[i,2] = nachName
Mitarbeiter[i,3] = 'hh'
Mitarbeiter[i,4] = '2022-01-01'
Mitarbeiter[i,5] = 'mail@example.com'
Mitarbeiter[i,6] = personsArray[i,2]
print(Mitarbeiter)
return Mitarbeiter
def main(db_user, db_pass, db_host, db_port, db_name):
'''
create argparse instance: --init if we want to create tables
'''
parser = argparse.ArgumentParser()
#stores true if we start like: python main.py --init
parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init')
#name of argument --init is boolean_init
init = parser.parse_args().boolean_init
print(init)
'''
connect to dataBank
'''
#build our engine to connect and execute in our databank
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
engine = db.create_engine(db_string, echo_pool=True)
#build the database itself, if it does not exist already
if not database_exists(engine.url):
print('db did not exist before')
create_database(engine.url)
'''
wait some time, so all data from the other containers can be scrapped
and saved
therefore the main.py can load all data and save it in DB
'''
#TODO: need dependency and NOT fixed sleep time
time.sleep(600)
'''
load Data scraped from Dobby
'''
Mitarbeiter = loadDobbyData()
mitarbeiterAnzahl = len(Mitarbeiter)
'''
Now all SQL commands
'''
with engine.connect() as connection:
if init:
createDB.createMitarbeiter(connection)
print('create')
try:
for i in range(mitarbeiterAnzahl):
fillDB.inputMitarbeiter(connection, Mitarbeiter[i])
except:
print("{} was filled in allready".format(Mitarbeiter[i][0]))
#createDB.dropMitarbeiter(connection)
if __name__ == '__main__':
'''
set values to connect to databank
'''
#NOTE: you maybe have to change db_host and db_name depending on your machine
#TODO: variables in docker compose
db_user = 'root'
db_pass = 'root'
db_host = 'pg_container_test_vimes' #at least should get postgres container ID automatically
#but not always working
db_port = '5432'
db_name = 'test_db_vimes'
main(db_user, db_pass, db_host, db_port, db_name)