Upload files to "/"

2025-04-01 08:45:01 +00:00
commit fb415260ba
5 changed files with 315 additions and 0 deletions
--- a/Links.txt
+++ b/Links.txt
@@ -0,0 +1,4 @@
+https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix
+
+
+
--- a/cronScript.sh
+++ b/cronScript.sh
@@ -0,0 +1,20 @@
+#this file is the script
+#   Cron will trigger once a week
+
+#You will find the crontab in the VM where the Containers will run 
+#   in /tmp/crontab...
+#   remember to put all full paths in crontab and make it runnable with chmod +x
+
+
+
+
+#build and start all containers
+docker compose build --no-cache
+docker compose up -d
+
+
+
+#docker stop of containers that only run for containers that already stopped
+#e.g. dobby and selenium
+bash statusAndStopScript.sh
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,96 @@
+
+version: '3.8'
+
+services:
+#Use postgres/example user/password credentials
+#container which contains the databank and its entries
+  db:
+    container_name: pg_container_test_vimes
+    image: postgres
+    restart: always
+    logging:                                          
+      driver: json-file
+      options:
+        max-size: 10m
+        max-file: "3"
+    environment:
+      POSTGRES_USER: root
+      POSTGRES_PASSWORD: root
+      POSTGRES_DB: test_db_vimes
+    ports:
+      - "5432:5432"
+
+
+#pg admin can display the databank entries in a browser
+#   pg_admin:
+#     container_name: pgadmin4_container_test_vimes
+#    image: dpage/pgadmin4
+#    restart: always
+#    environment:
+#      PGADMIN_DEFAULT_EMAIL: admin@admin.com            #somehow can not change those
+#      PGADMIN_DEFAULT_PASSWORD: root
+#    ports:
+#      - "5050:80"
+
+
+#MAIN controller for whole program
+  py_app_main:                                            #container called pyApp
+    container_name: pyapp_main_test_vimes                               
+    build:                                        #build it with
+      context: .                                   #either a path to a directory containing a Dockerfile, or a url to a git repository.
+      dockerfile: ./pyDockerfile            #path to the Dockerfile
+                                                #somehow need ./ so that Vimes uses the same dir as path
+    command: python main.py --init                     #the command which is done
+    logging:                                          #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log
+      driver: json-file
+      options:
+        max-size: 10m
+        max-file: "3"
+    ports: 
+      - 3000:5432
+    volumes:
+      - personsNames:/var/lib/data               #acceses the volume "personsNames" with its path "/var/lib/data"
+      
+
+
+
+#container which extracts all data from Dobby using selenium
+  py_extract_dobby:                                            
+    container_name: py_extract_dobby_test_vimes                               
+    build:                                        
+      context: .                                   
+      dockerfile: ./pyDockerfile                      #dockerfile with python image installing the required packages  
+    logging:                                          
+      driver: json-file
+      options:
+        max-size: 10m
+        max-file: "3"
+    command: python extractDobby.py                     
+    ports: 
+      - 6666:4444
+    volumes:
+      - personsNames:/var/lib/data                    #saves all names at crossnative in a volume
+      - "/data:/var/lib/data"                #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume   
+                                              #for now mount it to /data on the host machine, but maybe somewhere else later
+      
+
+
+#just contains the selenium webbrowser so other containers can use it
+  selenium_browser:                                            
+    container_name: sel_browser_test_vimes
+    image: selenium/standalone-chrome:4.10.0-20230607  
+    restart: unless-stopped   
+    logging:                                          
+      driver: json-file
+      options:
+        max-size: 10m
+        max-file: "3"
+    shm_size: '2gb'                                    #size of shared memory, made it much bigger, so that the tab will not crash                     
+    ports: 
+      - 4444:4444
+
+
+
+
+volumes: 
+  personsNames:                            #build a volume called "my-volume"  
--- a/extractDobby.py
+++ b/extractDobby.py
@@ -0,0 +1,63 @@
+#maybe need to import selenium here as well since selenium-webbrowser container connects to here
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+import numpy as np
+import pandas as pd
+
+
+#import extracting Dobby databank
+import source.Dobby.Dobby as Dobby
+
+
+'''
+get all Dobby data
+'''
+def main():
+    #Dobby.testDriver()
+    driver = Dobby.getDriver()
+    ppiXGroupNames = Dobby.findGroupNames(driver)
+    np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s')
+    print(ppiXGroupNames)
+
+    #newDriver = Dobby.getDriver()
+    persons = Dobby.getPersons(driver, ppiXGroupNames)
+    print(persons)
+    
+
+    mitarbeiterAnzahl = len(persons)
+    print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl))
+    personsArray = np.array(list(persons))
+    #print(personsArray)
+    personsArrayTrans = personsArray.transpose()
+    #print(personsArrayTrans)
+    
+
+    allNumbers = Dobby.getDobbyPersonData(driver, personsArray)
+    print(allNumbers)
+    allNumbersTransposed = allNumbers.transpose()
+
+    #TODO: solve that I only use one driver and never close it
+    driver.close()
+
+    dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]}
+    print(dictDobby)
+    dataframeDobby = pd.DataFrame.from_dict(dictDobby)
+
+
+
+
+    dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False)
+    np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s')
+#TODO: also have to save it as csv file
+
+
+if __name__ == '__main__':
+    main()
+
+
+
+
+
+
+
+
--- a/main.py
+++ b/main.py
@@ -0,0 +1,132 @@
+import sqlalchemy as db                 #somehow pyodbc does not work in container
+import numpy as np
+from sqlalchemy.sql import text
+from sqlalchemy_utils import database_exists, create_database
+import argparse
+import time
+
+
+import source.DataBank.createDatabank as createDB
+import source.DataBank.fillDatabank as fillDB
+
+
+'''
+read data of Dobby from volume
+'''
+def loadDobbyData():
+    personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60')       #important that we load strings
+    print(personsArray)
+
+    Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60')   #need to establish this array as array of strings, 
+                                                                    #to be able to save strings and not floats there
+    for i in range(len(personsArray)):
+        Mitarbeiter[i,0] = personsArray[i,0]                #Kuerzel
+
+        nameParts = personsArray[i,1].split()               #split the full name
+        nachName = nameParts[len(nameParts) - 1]            #last word is last Name
+        vorName = " ".join(nameParts[:-1])                  #all words before is first name
+
+        Mitarbeiter[i,1] = vorName               #Vorname
+        Mitarbeiter[i,2] = nachName
+
+        Mitarbeiter[i,3] = 'hh'
+        Mitarbeiter[i,4] = '2022-01-01'
+        Mitarbeiter[i,5] = 'mail@example.com'
+        Mitarbeiter[i,6] = personsArray[i,2]
+    print(Mitarbeiter)
+
+    return Mitarbeiter
+
+
+
+
+
+
+def main(db_user, db_pass, db_host, db_port, db_name):
+    '''
+    create argparse instance: --init if we want to create tables
+    '''
+    parser = argparse.ArgumentParser()
+    #stores true if we start like: python main.py --init
+    parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init') 
+    #name of argument --init is boolean_init            
+    init = parser.parse_args().boolean_init
+    print(init)
+
+
+    '''
+    connect to dataBank
+    '''
+    #build our engine to connect and execute in our databank
+    db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
+    engine = db.create_engine(db_string, echo_pool=True)
+    #build the database itself, if it does not exist already
+    if not database_exists(engine.url):
+        print('db did not exist before')
+        create_database(engine.url)
+
+    '''
+    wait some time, so all data from the other containers can be scrapped
+    and saved
+    therefore the main.py can load all data and save it in DB
+    '''
+    #TODO: need dependency and NOT fixed sleep time
+    time.sleep(600)
+
+
+    '''
+    load Data scraped from Dobby
+    '''
+    Mitarbeiter = loadDobbyData()
+    mitarbeiterAnzahl = len(Mitarbeiter)
+
+
+
+
+    '''
+    Now all SQL commands
+    '''
+    with engine.connect() as connection:
+        if init:
+            createDB.createMitarbeiter(connection)
+            print('create')
+
+        
+            try:
+                for i in range(mitarbeiterAnzahl):
+                    fillDB.inputMitarbeiter(connection, Mitarbeiter[i])
+            except:
+                print("{} was filled in allready".format(Mitarbeiter[i][0]))
+        #createDB.dropMitarbeiter(connection)
+
+
+
+
+
+if __name__ == '__main__':
+    '''
+    set values to connect to databank
+    '''
+    #NOTE: you maybe have to change db_host and db_name depending on your machine
+    #TODO: variables in docker compose
+    db_user = 'root'
+    db_pass = 'root'
+    db_host = 'pg_container_test_vimes'              #at least should get postgres container ID automatically
+                                          #but not always working
+    db_port = '5432'
+    db_name = 'test_db_vimes'
+
+    main(db_user, db_pass, db_host, db_port, db_name)
+
+
+
+        
+
+
+
+
+
+
+
+
+
				`@@ -0,0 +1,4 @@`
				`https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix`