Upload files to "/"
This commit is contained in:
commit
fb415260ba
4
Links.txt
Normal file
4
Links.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
20
cronScript.sh
Normal file
20
cronScript.sh
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#this file is the script
|
||||||
|
# Cron will trigger once a week
|
||||||
|
|
||||||
|
#You will find the crontab in the VM where the Containers will run
|
||||||
|
# in /tmp/crontab...
|
||||||
|
# remember to put all full paths in crontab and make it runnable with chmod +x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#build and start all containers
|
||||||
|
docker compose build --no-cache
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#docker stop of containers that only run for containers that already stopped
|
||||||
|
#e.g. dobby and selenium
|
||||||
|
bash statusAndStopScript.sh
|
||||||
|
|
||||||
96
docker-compose.yml
Normal file
96
docker-compose.yml
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
#Use postgres/example user/password credentials
|
||||||
|
#container which contains the databank and its entries
|
||||||
|
db:
|
||||||
|
container_name: pg_container_test_vimes
|
||||||
|
image: postgres
|
||||||
|
restart: always
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 10m
|
||||||
|
max-file: "3"
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: root
|
||||||
|
POSTGRES_PASSWORD: root
|
||||||
|
POSTGRES_DB: test_db_vimes
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
|
||||||
|
|
||||||
|
#pg admin can display the databank entries in a browser
|
||||||
|
# pg_admin:
|
||||||
|
# container_name: pgadmin4_container_test_vimes
|
||||||
|
# image: dpage/pgadmin4
|
||||||
|
# restart: always
|
||||||
|
# environment:
|
||||||
|
# PGADMIN_DEFAULT_EMAIL: admin@admin.com #somehow can not change those
|
||||||
|
# PGADMIN_DEFAULT_PASSWORD: root
|
||||||
|
# ports:
|
||||||
|
# - "5050:80"
|
||||||
|
|
||||||
|
|
||||||
|
#MAIN controller for whole program
|
||||||
|
py_app_main: #container called pyApp
|
||||||
|
container_name: pyapp_main_test_vimes
|
||||||
|
build: #build it with
|
||||||
|
context: . #either a path to a directory containing a Dockerfile, or a url to a git repository.
|
||||||
|
dockerfile: ./pyDockerfile #path to the Dockerfile
|
||||||
|
#somehow need ./ so that Vimes uses the same dir as path
|
||||||
|
command: python main.py --init #the command which is done
|
||||||
|
logging: #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 10m
|
||||||
|
max-file: "3"
|
||||||
|
ports:
|
||||||
|
- 3000:5432
|
||||||
|
volumes:
|
||||||
|
- personsNames:/var/lib/data #acceses the volume "personsNames" with its path "/var/lib/data"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#container which extracts all data from Dobby using selenium
|
||||||
|
py_extract_dobby:
|
||||||
|
container_name: py_extract_dobby_test_vimes
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./pyDockerfile #dockerfile with python image installing the required packages
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 10m
|
||||||
|
max-file: "3"
|
||||||
|
command: python extractDobby.py
|
||||||
|
ports:
|
||||||
|
- 6666:4444
|
||||||
|
volumes:
|
||||||
|
- personsNames:/var/lib/data #saves all names at crossnative in a volume
|
||||||
|
- "/data:/var/lib/data" #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume
|
||||||
|
#for now mount it to /data on the host machine, but maybe somewhere else later
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#just contains the selenium webbrowser so other containers can use it
|
||||||
|
selenium_browser:
|
||||||
|
container_name: sel_browser_test_vimes
|
||||||
|
image: selenium/standalone-chrome:4.10.0-20230607
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 10m
|
||||||
|
max-file: "3"
|
||||||
|
shm_size: '2gb' #size of shared memory, made it much bigger, so that the tab will not crash
|
||||||
|
ports:
|
||||||
|
- 4444:4444
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
personsNames: #build a volume called "my-volume"
|
||||||
63
extractDobby.py
Normal file
63
extractDobby.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
#maybe need to import selenium here as well since selenium-webbrowser container connects to here
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.common.exceptions import NoSuchElementException
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
#import extracting Dobby databank
|
||||||
|
import source.Dobby.Dobby as Dobby
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
get all Dobby data
|
||||||
|
'''
|
||||||
|
def main():
|
||||||
|
#Dobby.testDriver()
|
||||||
|
driver = Dobby.getDriver()
|
||||||
|
ppiXGroupNames = Dobby.findGroupNames(driver)
|
||||||
|
np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s')
|
||||||
|
print(ppiXGroupNames)
|
||||||
|
|
||||||
|
#newDriver = Dobby.getDriver()
|
||||||
|
persons = Dobby.getPersons(driver, ppiXGroupNames)
|
||||||
|
print(persons)
|
||||||
|
|
||||||
|
|
||||||
|
mitarbeiterAnzahl = len(persons)
|
||||||
|
print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl))
|
||||||
|
personsArray = np.array(list(persons))
|
||||||
|
#print(personsArray)
|
||||||
|
personsArrayTrans = personsArray.transpose()
|
||||||
|
#print(personsArrayTrans)
|
||||||
|
|
||||||
|
|
||||||
|
allNumbers = Dobby.getDobbyPersonData(driver, personsArray)
|
||||||
|
print(allNumbers)
|
||||||
|
allNumbersTransposed = allNumbers.transpose()
|
||||||
|
|
||||||
|
#TODO: solve that I only use one driver and never close it
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]}
|
||||||
|
print(dictDobby)
|
||||||
|
dataframeDobby = pd.DataFrame.from_dict(dictDobby)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False)
|
||||||
|
np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s')
|
||||||
|
#TODO: also have to save it as csv file
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
132
main.py
Normal file
132
main.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
import sqlalchemy as db #somehow pyodbc does not work in container
|
||||||
|
import numpy as np
|
||||||
|
from sqlalchemy.sql import text
|
||||||
|
from sqlalchemy_utils import database_exists, create_database
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
import source.DataBank.createDatabank as createDB
|
||||||
|
import source.DataBank.fillDatabank as fillDB
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
read data of Dobby from volume
|
||||||
|
'''
|
||||||
|
def loadDobbyData():
|
||||||
|
personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60') #important that we load strings
|
||||||
|
print(personsArray)
|
||||||
|
|
||||||
|
Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60') #need to establish this array as array of strings,
|
||||||
|
#to be able to save strings and not floats there
|
||||||
|
for i in range(len(personsArray)):
|
||||||
|
Mitarbeiter[i,0] = personsArray[i,0] #Kuerzel
|
||||||
|
|
||||||
|
nameParts = personsArray[i,1].split() #split the full name
|
||||||
|
nachName = nameParts[len(nameParts) - 1] #last word is last Name
|
||||||
|
vorName = " ".join(nameParts[:-1]) #all words before is first name
|
||||||
|
|
||||||
|
Mitarbeiter[i,1] = vorName #Vorname
|
||||||
|
Mitarbeiter[i,2] = nachName
|
||||||
|
|
||||||
|
Mitarbeiter[i,3] = 'hh'
|
||||||
|
Mitarbeiter[i,4] = '2022-01-01'
|
||||||
|
Mitarbeiter[i,5] = 'mail@example.com'
|
||||||
|
Mitarbeiter[i,6] = personsArray[i,2]
|
||||||
|
print(Mitarbeiter)
|
||||||
|
|
||||||
|
return Mitarbeiter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main(db_user, db_pass, db_host, db_port, db_name):
|
||||||
|
'''
|
||||||
|
create argparse instance: --init if we want to create tables
|
||||||
|
'''
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
#stores true if we start like: python main.py --init
|
||||||
|
parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init')
|
||||||
|
#name of argument --init is boolean_init
|
||||||
|
init = parser.parse_args().boolean_init
|
||||||
|
print(init)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
connect to dataBank
|
||||||
|
'''
|
||||||
|
#build our engine to connect and execute in our databank
|
||||||
|
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
|
||||||
|
engine = db.create_engine(db_string, echo_pool=True)
|
||||||
|
#build the database itself, if it does not exist already
|
||||||
|
if not database_exists(engine.url):
|
||||||
|
print('db did not exist before')
|
||||||
|
create_database(engine.url)
|
||||||
|
|
||||||
|
'''
|
||||||
|
wait some time, so all data from the other containers can be scrapped
|
||||||
|
and saved
|
||||||
|
therefore the main.py can load all data and save it in DB
|
||||||
|
'''
|
||||||
|
#TODO: need dependency and NOT fixed sleep time
|
||||||
|
time.sleep(600)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
load Data scraped from Dobby
|
||||||
|
'''
|
||||||
|
Mitarbeiter = loadDobbyData()
|
||||||
|
mitarbeiterAnzahl = len(Mitarbeiter)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Now all SQL commands
|
||||||
|
'''
|
||||||
|
with engine.connect() as connection:
|
||||||
|
if init:
|
||||||
|
createDB.createMitarbeiter(connection)
|
||||||
|
print('create')
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
for i in range(mitarbeiterAnzahl):
|
||||||
|
fillDB.inputMitarbeiter(connection, Mitarbeiter[i])
|
||||||
|
except:
|
||||||
|
print("{} was filled in allready".format(Mitarbeiter[i][0]))
|
||||||
|
#createDB.dropMitarbeiter(connection)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
'''
|
||||||
|
set values to connect to databank
|
||||||
|
'''
|
||||||
|
#NOTE: you maybe have to change db_host and db_name depending on your machine
|
||||||
|
#TODO: variables in docker compose
|
||||||
|
db_user = 'root'
|
||||||
|
db_pass = 'root'
|
||||||
|
db_host = 'pg_container_test_vimes' #at least should get postgres container ID automatically
|
||||||
|
#but not always working
|
||||||
|
db_port = '5432'
|
||||||
|
db_name = 'test_db_vimes'
|
||||||
|
|
||||||
|
main(db_user, db_pass, db_host, db_port, db_name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user