Upload files to "/"
This commit is contained in:
commit
fb415260ba
4
Links.txt
Normal file
4
Links.txt
Normal file
@ -0,0 +1,4 @@
|
||||
https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix
|
||||
|
||||
|
||||
|
||||
20
cronScript.sh
Normal file
20
cronScript.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#this file is the script
|
||||
# Cron will trigger once a week
|
||||
|
||||
#You will find the crontab in the VM where the Containers will run
|
||||
# in /tmp/crontab...
|
||||
# remember to put all full paths in crontab and make it runnable with chmod +x
|
||||
|
||||
|
||||
|
||||
|
||||
#build and start all containers
|
||||
docker compose build --no-cache
|
||||
docker compose up -d
|
||||
|
||||
|
||||
|
||||
#docker stop of containers that only run for containers that already stopped
|
||||
#e.g. dobby and selenium
|
||||
bash statusAndStopScript.sh
|
||||
|
||||
96
docker-compose.yml
Normal file
96
docker-compose.yml
Normal file
@ -0,0 +1,96 @@
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
#Use postgres/example user/password credentials
|
||||
#container which contains the databank and its entries
|
||||
db:
|
||||
container_name: pg_container_test_vimes
|
||||
image: postgres
|
||||
restart: always
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 10m
|
||||
max-file: "3"
|
||||
environment:
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: root
|
||||
POSTGRES_DB: test_db_vimes
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
||||
|
||||
#pg admin can display the databank entries in a browser
|
||||
# pg_admin:
|
||||
# container_name: pgadmin4_container_test_vimes
|
||||
# image: dpage/pgadmin4
|
||||
# restart: always
|
||||
# environment:
|
||||
# PGADMIN_DEFAULT_EMAIL: admin@admin.com #somehow can not change those
|
||||
# PGADMIN_DEFAULT_PASSWORD: root
|
||||
# ports:
|
||||
# - "5050:80"
|
||||
|
||||
|
||||
#MAIN controller for whole program
|
||||
py_app_main: #container called pyApp
|
||||
container_name: pyapp_main_test_vimes
|
||||
build: #build it with
|
||||
context: . #either a path to a directory containing a Dockerfile, or a url to a git repository.
|
||||
dockerfile: ./pyDockerfile #path to the Dockerfile
|
||||
#somehow need ./ so that Vimes uses the same dir as path
|
||||
command: python main.py --init #the command which is done
|
||||
logging: #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 10m
|
||||
max-file: "3"
|
||||
ports:
|
||||
- 3000:5432
|
||||
volumes:
|
||||
- personsNames:/var/lib/data #acceses the volume "personsNames" with its path "/var/lib/data"
|
||||
|
||||
|
||||
|
||||
|
||||
#container which extracts all data from Dobby using selenium
|
||||
py_extract_dobby:
|
||||
container_name: py_extract_dobby_test_vimes
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./pyDockerfile #dockerfile with python image installing the required packages
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 10m
|
||||
max-file: "3"
|
||||
command: python extractDobby.py
|
||||
ports:
|
||||
- 6666:4444
|
||||
volumes:
|
||||
- personsNames:/var/lib/data #saves all names at crossnative in a volume
|
||||
- "/data:/var/lib/data" #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume
|
||||
#for now mount it to /data on the host machine, but maybe somewhere else later
|
||||
|
||||
|
||||
|
||||
#just contains the selenium webbrowser so other containers can use it
|
||||
selenium_browser:
|
||||
container_name: sel_browser_test_vimes
|
||||
image: selenium/standalone-chrome:4.10.0-20230607
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 10m
|
||||
max-file: "3"
|
||||
shm_size: '2gb' #size of shared memory, made it much bigger, so that the tab will not crash
|
||||
ports:
|
||||
- 4444:4444
|
||||
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
personsNames: #build a volume called "my-volume"
|
||||
63
extractDobby.py
Normal file
63
extractDobby.py
Normal file
@ -0,0 +1,63 @@
|
||||
#maybe need to import selenium here as well since selenium-webbrowser container connects to here
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
#import extracting Dobby databank
|
||||
import source.Dobby.Dobby as Dobby
|
||||
|
||||
|
||||
'''
|
||||
get all Dobby data
|
||||
'''
|
||||
def main():
|
||||
#Dobby.testDriver()
|
||||
driver = Dobby.getDriver()
|
||||
ppiXGroupNames = Dobby.findGroupNames(driver)
|
||||
np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s')
|
||||
print(ppiXGroupNames)
|
||||
|
||||
#newDriver = Dobby.getDriver()
|
||||
persons = Dobby.getPersons(driver, ppiXGroupNames)
|
||||
print(persons)
|
||||
|
||||
|
||||
mitarbeiterAnzahl = len(persons)
|
||||
print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl))
|
||||
personsArray = np.array(list(persons))
|
||||
#print(personsArray)
|
||||
personsArrayTrans = personsArray.transpose()
|
||||
#print(personsArrayTrans)
|
||||
|
||||
|
||||
allNumbers = Dobby.getDobbyPersonData(driver, personsArray)
|
||||
print(allNumbers)
|
||||
allNumbersTransposed = allNumbers.transpose()
|
||||
|
||||
#TODO: solve that I only use one driver and never close it
|
||||
driver.close()
|
||||
|
||||
dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]}
|
||||
print(dictDobby)
|
||||
dataframeDobby = pd.DataFrame.from_dict(dictDobby)
|
||||
|
||||
|
||||
|
||||
|
||||
dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False)
|
||||
np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s')
|
||||
#TODO: also have to save it as csv file
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
132
main.py
Normal file
132
main.py
Normal file
@ -0,0 +1,132 @@
|
||||
import sqlalchemy as db #somehow pyodbc does not work in container
|
||||
import numpy as np
|
||||
from sqlalchemy.sql import text
|
||||
from sqlalchemy_utils import database_exists, create_database
|
||||
import argparse
|
||||
import time
|
||||
|
||||
|
||||
import source.DataBank.createDatabank as createDB
|
||||
import source.DataBank.fillDatabank as fillDB
|
||||
|
||||
|
||||
'''
|
||||
read data of Dobby from volume
|
||||
'''
|
||||
def loadDobbyData():
|
||||
personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60') #important that we load strings
|
||||
print(personsArray)
|
||||
|
||||
Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60') #need to establish this array as array of strings,
|
||||
#to be able to save strings and not floats there
|
||||
for i in range(len(personsArray)):
|
||||
Mitarbeiter[i,0] = personsArray[i,0] #Kuerzel
|
||||
|
||||
nameParts = personsArray[i,1].split() #split the full name
|
||||
nachName = nameParts[len(nameParts) - 1] #last word is last Name
|
||||
vorName = " ".join(nameParts[:-1]) #all words before is first name
|
||||
|
||||
Mitarbeiter[i,1] = vorName #Vorname
|
||||
Mitarbeiter[i,2] = nachName
|
||||
|
||||
Mitarbeiter[i,3] = 'hh'
|
||||
Mitarbeiter[i,4] = '2022-01-01'
|
||||
Mitarbeiter[i,5] = 'mail@example.com'
|
||||
Mitarbeiter[i,6] = personsArray[i,2]
|
||||
print(Mitarbeiter)
|
||||
|
||||
return Mitarbeiter
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def main(db_user, db_pass, db_host, db_port, db_name):
|
||||
'''
|
||||
create argparse instance: --init if we want to create tables
|
||||
'''
|
||||
parser = argparse.ArgumentParser()
|
||||
#stores true if we start like: python main.py --init
|
||||
parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init')
|
||||
#name of argument --init is boolean_init
|
||||
init = parser.parse_args().boolean_init
|
||||
print(init)
|
||||
|
||||
|
||||
'''
|
||||
connect to dataBank
|
||||
'''
|
||||
#build our engine to connect and execute in our databank
|
||||
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
|
||||
engine = db.create_engine(db_string, echo_pool=True)
|
||||
#build the database itself, if it does not exist already
|
||||
if not database_exists(engine.url):
|
||||
print('db did not exist before')
|
||||
create_database(engine.url)
|
||||
|
||||
'''
|
||||
wait some time, so all data from the other containers can be scrapped
|
||||
and saved
|
||||
therefore the main.py can load all data and save it in DB
|
||||
'''
|
||||
#TODO: need dependency and NOT fixed sleep time
|
||||
time.sleep(600)
|
||||
|
||||
|
||||
'''
|
||||
load Data scraped from Dobby
|
||||
'''
|
||||
Mitarbeiter = loadDobbyData()
|
||||
mitarbeiterAnzahl = len(Mitarbeiter)
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
Now all SQL commands
|
||||
'''
|
||||
with engine.connect() as connection:
|
||||
if init:
|
||||
createDB.createMitarbeiter(connection)
|
||||
print('create')
|
||||
|
||||
|
||||
try:
|
||||
for i in range(mitarbeiterAnzahl):
|
||||
fillDB.inputMitarbeiter(connection, Mitarbeiter[i])
|
||||
except:
|
||||
print("{} was filled in allready".format(Mitarbeiter[i][0]))
|
||||
#createDB.dropMitarbeiter(connection)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
'''
|
||||
set values to connect to databank
|
||||
'''
|
||||
#NOTE: you maybe have to change db_host and db_name depending on your machine
|
||||
#TODO: variables in docker compose
|
||||
db_user = 'root'
|
||||
db_pass = 'root'
|
||||
db_host = 'pg_container_test_vimes' #at least should get postgres container ID automatically
|
||||
#but not always working
|
||||
db_port = '5432'
|
||||
db_name = 'test_db_vimes'
|
||||
|
||||
main(db_user, db_pass, db_host, db_port, db_name)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user