Upload files to "/"
This commit is contained in:
		
						commit
						fb415260ba
					
				
							
								
								
									
										4
									
								
								Links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								Links.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | |||||||
|  | https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group=prj_ppix | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
							
								
								
									
										20
									
								
								cronScript.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								cronScript.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | #this file is the script | ||||||
|  | #   Cron will trigger once a week | ||||||
|  | 
 | ||||||
|  | #You will find the crontab in the VM where the Containers will run  | ||||||
|  | #   in /tmp/crontab... | ||||||
|  | #   remember to put all full paths in crontab and make it runnable with chmod +x | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #build and start all containers | ||||||
|  | docker compose build --no-cache | ||||||
|  | docker compose up -d | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #docker stop of containers that only run for containers that already stopped | ||||||
|  | #e.g. dobby and selenium | ||||||
|  | bash statusAndStopScript.sh | ||||||
|  | 
 | ||||||
							
								
								
									
										96
									
								
								docker-compose.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								docker-compose.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,96 @@ | |||||||
|  | 
 | ||||||
|  | version: '3.8' | ||||||
|  | 
 | ||||||
|  | services: | ||||||
|  | #Use postgres/example user/password credentials | ||||||
|  | #container which contains the databank and its entries | ||||||
|  |   db: | ||||||
|  |     container_name: pg_container_test_vimes | ||||||
|  |     image: postgres | ||||||
|  |     restart: always | ||||||
|  |     logging:                                           | ||||||
|  |       driver: json-file | ||||||
|  |       options: | ||||||
|  |         max-size: 10m | ||||||
|  |         max-file: "3" | ||||||
|  |     environment: | ||||||
|  |       POSTGRES_USER: root | ||||||
|  |       POSTGRES_PASSWORD: root | ||||||
|  |       POSTGRES_DB: test_db_vimes | ||||||
|  |     ports: | ||||||
|  |       - "5432:5432" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #pg admin can display the databank entries in a browser | ||||||
|  | #   pg_admin: | ||||||
|  | #     container_name: pgadmin4_container_test_vimes | ||||||
|  | #    image: dpage/pgadmin4 | ||||||
|  | #    restart: always | ||||||
|  | #    environment: | ||||||
|  | #      PGADMIN_DEFAULT_EMAIL: admin@admin.com            #somehow can not change those | ||||||
|  | #      PGADMIN_DEFAULT_PASSWORD: root | ||||||
|  | #    ports: | ||||||
|  | #      - "5050:80" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #MAIN controller for whole program | ||||||
|  |   py_app_main:                                            #container called pyApp | ||||||
|  |     container_name: pyapp_main_test_vimes                                | ||||||
|  |     build:                                        #build it with | ||||||
|  |       context: .                                   #either a path to a directory containing a Dockerfile, or a url to a git repository. | ||||||
|  |       dockerfile: ./pyDockerfile            #path to the Dockerfile | ||||||
|  |                                                 #somehow need ./ so that Vimes uses the same dir as path | ||||||
|  |     command: python main.py --init                     #the command which is done | ||||||
|  |     logging:                                          #build a log file of the terminal output in /var/lib/docker/containers/[container-id]/[container-id]-json.log | ||||||
|  |       driver: json-file | ||||||
|  |       options: | ||||||
|  |         max-size: 10m | ||||||
|  |         max-file: "3" | ||||||
|  |     ports:  | ||||||
|  |       - 3000:5432 | ||||||
|  |     volumes: | ||||||
|  |       - personsNames:/var/lib/data               #acceses the volume "personsNames" with its path "/var/lib/data" | ||||||
|  |        | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #container which extracts all data from Dobby using selenium | ||||||
|  |   py_extract_dobby:                                             | ||||||
|  |     container_name: py_extract_dobby_test_vimes                                | ||||||
|  |     build:                                         | ||||||
|  |       context: .                                    | ||||||
|  |       dockerfile: ./pyDockerfile                      #dockerfile with python image installing the required packages   | ||||||
|  |     logging:                                           | ||||||
|  |       driver: json-file | ||||||
|  |       options: | ||||||
|  |         max-size: 10m | ||||||
|  |         max-file: "3" | ||||||
|  |     command: python extractDobby.py                      | ||||||
|  |     ports:  | ||||||
|  |       - 6666:4444 | ||||||
|  |     volumes: | ||||||
|  |       - personsNames:/var/lib/data                    #saves all names at crossnative in a volume | ||||||
|  |       - "/data:/var/lib/data"                #can save it in host, therefore use hostpath:conatainerpath => this is a mount, not a volume    | ||||||
|  |                                               #for now mount it to /data on the host machine, but maybe somewhere else later | ||||||
|  |        | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #just contains the selenium webbrowser so other containers can use it | ||||||
|  |   selenium_browser:                                             | ||||||
|  |     container_name: sel_browser_test_vimes | ||||||
|  |     image: selenium/standalone-chrome:4.10.0-20230607   | ||||||
|  |     restart: unless-stopped    | ||||||
|  |     logging:                                           | ||||||
|  |       driver: json-file | ||||||
|  |       options: | ||||||
|  |         max-size: 10m | ||||||
|  |         max-file: "3" | ||||||
|  |     shm_size: '2gb'                                    #size of shared memory, made it much bigger, so that the tab will not crash                      | ||||||
|  |     ports:  | ||||||
|  |       - 4444:4444 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | volumes:  | ||||||
|  |   personsNames:                            #build a volume called "my-volume"   | ||||||
							
								
								
									
										63
									
								
								extractDobby.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								extractDobby.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | |||||||
|  | #maybe need to import selenium here as well since selenium-webbrowser container connects to here | ||||||
|  | from selenium import webdriver | ||||||
|  | from selenium.common.exceptions import NoSuchElementException | ||||||
|  | import numpy as np | ||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #import extracting Dobby databank | ||||||
|  | import source.Dobby.Dobby as Dobby | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  | get all Dobby data | ||||||
|  | ''' | ||||||
|  | def main(): | ||||||
|  |     #Dobby.testDriver() | ||||||
|  |     driver = Dobby.getDriver() | ||||||
|  |     ppiXGroupNames = Dobby.findGroupNames(driver) | ||||||
|  |     np.savetxt("/var/lib/data/test.txt", [1, 2], delimiter=",", fmt='%s') | ||||||
|  |     print(ppiXGroupNames) | ||||||
|  | 
 | ||||||
|  |     #newDriver = Dobby.getDriver() | ||||||
|  |     persons = Dobby.getPersons(driver, ppiXGroupNames) | ||||||
|  |     print(persons) | ||||||
|  |      | ||||||
|  | 
 | ||||||
|  |     mitarbeiterAnzahl = len(persons) | ||||||
|  |     print("Anzahl der Mitarbeiter: " + str(mitarbeiterAnzahl)) | ||||||
|  |     personsArray = np.array(list(persons)) | ||||||
|  |     #print(personsArray) | ||||||
|  |     personsArrayTrans = personsArray.transpose() | ||||||
|  |     #print(personsArrayTrans) | ||||||
|  |      | ||||||
|  | 
 | ||||||
|  |     allNumbers = Dobby.getDobbyPersonData(driver, personsArray) | ||||||
|  |     print(allNumbers) | ||||||
|  |     allNumbersTransposed = allNumbers.transpose() | ||||||
|  | 
 | ||||||
|  |     #TODO: solve that I only use one driver and never close it | ||||||
|  |     driver.close() | ||||||
|  | 
 | ||||||
|  |     dictDobby = {'kuerzel': personsArrayTrans[0], 'name': personsArrayTrans[1], 'mobile': allNumbersTransposed[0]} | ||||||
|  |     print(dictDobby) | ||||||
|  |     dataframeDobby = pd.DataFrame.from_dict(dictDobby) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     dataframeDobby.to_csv('/var/lib/data/fullDobby.csv', index=False) | ||||||
|  |     np.savetxt("/var/lib/data/personsDobby.csv", personsArray, delimiter=",", fmt='%s') | ||||||
|  | #TODO: also have to save it as csv file | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
							
								
								
									
										132
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,132 @@ | |||||||
|  | import sqlalchemy as db                 #somehow pyodbc does not work in container | ||||||
|  | import numpy as np | ||||||
|  | from sqlalchemy.sql import text | ||||||
|  | from sqlalchemy_utils import database_exists, create_database | ||||||
|  | import argparse | ||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | import source.DataBank.createDatabank as createDB | ||||||
|  | import source.DataBank.fillDatabank as fillDB | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  | read data of Dobby from volume | ||||||
|  | ''' | ||||||
|  | def loadDobbyData(): | ||||||
|  |     personsArray = np.loadtxt("/var/lib/data/fullDobby.csv", delimiter=",", dtype='U60')       #important that we load strings | ||||||
|  |     print(personsArray) | ||||||
|  | 
 | ||||||
|  |     Mitarbeiter = np.zeros((len(personsArray), 7), dtype='U60')   #need to establish this array as array of strings,  | ||||||
|  |                                                                     #to be able to save strings and not floats there | ||||||
|  |     for i in range(len(personsArray)): | ||||||
|  |         Mitarbeiter[i,0] = personsArray[i,0]                #Kuerzel | ||||||
|  | 
 | ||||||
|  |         nameParts = personsArray[i,1].split()               #split the full name | ||||||
|  |         nachName = nameParts[len(nameParts) - 1]            #last word is last Name | ||||||
|  |         vorName = " ".join(nameParts[:-1])                  #all words before is first name | ||||||
|  | 
 | ||||||
|  |         Mitarbeiter[i,1] = vorName               #Vorname | ||||||
|  |         Mitarbeiter[i,2] = nachName | ||||||
|  | 
 | ||||||
|  |         Mitarbeiter[i,3] = 'hh' | ||||||
|  |         Mitarbeiter[i,4] = '2022-01-01' | ||||||
|  |         Mitarbeiter[i,5] = 'mail@example.com' | ||||||
|  |         Mitarbeiter[i,6] = personsArray[i,2] | ||||||
|  |     print(Mitarbeiter) | ||||||
|  | 
 | ||||||
|  |     return Mitarbeiter | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(db_user, db_pass, db_host, db_port, db_name): | ||||||
|  |     ''' | ||||||
|  |     create argparse instance: --init if we want to create tables | ||||||
|  |     ''' | ||||||
|  |     parser = argparse.ArgumentParser() | ||||||
|  |     #stores true if we start like: python main.py --init | ||||||
|  |     parser.add_argument('-i', '--init', action='store_true', default=False, dest='boolean_init')  | ||||||
|  |     #name of argument --init is boolean_init             | ||||||
|  |     init = parser.parse_args().boolean_init | ||||||
|  |     print(init) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     connect to dataBank | ||||||
|  |     ''' | ||||||
|  |     #build our engine to connect and execute in our databank | ||||||
|  |     db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name) | ||||||
|  |     engine = db.create_engine(db_string, echo_pool=True) | ||||||
|  |     #build the database itself, if it does not exist already | ||||||
|  |     if not database_exists(engine.url): | ||||||
|  |         print('db did not exist before') | ||||||
|  |         create_database(engine.url) | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     wait some time, so all data from the other containers can be scrapped | ||||||
|  |     and saved | ||||||
|  |     therefore the main.py can load all data and save it in DB | ||||||
|  |     ''' | ||||||
|  |     #TODO: need dependency and NOT fixed sleep time | ||||||
|  |     time.sleep(600) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     load Data scraped from Dobby | ||||||
|  |     ''' | ||||||
|  |     Mitarbeiter = loadDobbyData() | ||||||
|  |     mitarbeiterAnzahl = len(Mitarbeiter) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     Now all SQL commands | ||||||
|  |     ''' | ||||||
|  |     with engine.connect() as connection: | ||||||
|  |         if init: | ||||||
|  |             createDB.createMitarbeiter(connection) | ||||||
|  |             print('create') | ||||||
|  | 
 | ||||||
|  |          | ||||||
|  |             try: | ||||||
|  |                 for i in range(mitarbeiterAnzahl): | ||||||
|  |                     fillDB.inputMitarbeiter(connection, Mitarbeiter[i]) | ||||||
|  |             except: | ||||||
|  |                 print("{} was filled in allready".format(Mitarbeiter[i][0])) | ||||||
|  |         #createDB.dropMitarbeiter(connection) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     ''' | ||||||
|  |     set values to connect to databank | ||||||
|  |     ''' | ||||||
|  |     #NOTE: you maybe have to change db_host and db_name depending on your machine | ||||||
|  |     #TODO: variables in docker compose | ||||||
|  |     db_user = 'root' | ||||||
|  |     db_pass = 'root' | ||||||
|  |     db_host = 'pg_container_test_vimes'              #at least should get postgres container ID automatically | ||||||
|  |                                           #but not always working | ||||||
|  |     db_port = '5432' | ||||||
|  |     db_name = 'test_db_vimes' | ||||||
|  | 
 | ||||||
|  |     main(db_user, db_pass, db_host, db_port, db_name) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |          | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user