Upload files to "source/Dobby"
This commit is contained in:
286
source/Dobby/Dobby.py
Normal file
286
source/Dobby/Dobby.py
Normal file
@@ -0,0 +1,286 @@
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
#some notes:
|
||||
#the links to find elements can be found with:
|
||||
#right click on browser and choose Untersuchen(Q)
|
||||
#shows all elements => find the needed table by hand
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
define a driver = mousepointer using Chrome as Browser (Firefox did not work)
|
||||
OUT:
|
||||
driver: driver Selenium Object, which can connect to website
|
||||
'''
|
||||
def getDriver():
|
||||
#definitely need sleep since first have to establish Selenium
|
||||
time.sleep(10)
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument('--disable-blink-features=AutomationControlled')
|
||||
options.add_argument('--ignore-ssl-errors=yes')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
|
||||
print("Now will connect")
|
||||
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
|
||||
#also need _vimes since we changed the name of the container in this dir
|
||||
print("did connect successfully")
|
||||
|
||||
return driver
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
find names of each Group of PPI.X on Dobby
|
||||
IN:
|
||||
driver: driver Selenium Object, which can connect to website
|
||||
OUT:
|
||||
groupsPpiX: Array(n) of string
|
||||
'''
|
||||
def findGroupNames(driver):
|
||||
groupsPpiX = []
|
||||
|
||||
#define the link to access and access it
|
||||
pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
|
||||
print(pathDobbyProjects)
|
||||
try:
|
||||
driver.get(pathDobbyProjects)
|
||||
print("on website now")
|
||||
except:
|
||||
print("could not connect to Dobby website")
|
||||
|
||||
#get data from Dobby
|
||||
try:
|
||||
#get the table with all projects (the lower table)
|
||||
table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")
|
||||
|
||||
#go through each row in that table
|
||||
for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
|
||||
try:
|
||||
groupId = row.find_element("xpath", ".//td[1]").text #first column in table is Group
|
||||
print(groupId)
|
||||
groupDesc = row.find_element("xpath", ".//td[2]").text #second is Description
|
||||
if groupId[:8] == 'prj_ppix': #only choose groups starting like that = from PPI.X
|
||||
groupsPpiX.append([groupId, groupDesc]) #append
|
||||
except NoSuchElementException:
|
||||
print("NoSuchElementException")
|
||||
#driver.close()
|
||||
print("done going through all projects")
|
||||
except:
|
||||
print("no such tables found, where we get group names")
|
||||
|
||||
return groupsPpiX
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
find all persons working at PPI.X
|
||||
and save names and short notation
|
||||
IN:
|
||||
groupsNames: Array(n,2) of string
|
||||
OUT:
|
||||
persons: set(m) of string arrays(2)
|
||||
'''
|
||||
def getPersons(driver, groupsNames):
|
||||
persons = set()
|
||||
groupPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group="
|
||||
|
||||
for group, groupDesc in groupsNames:
|
||||
try:
|
||||
driver.get(groupPath + group)
|
||||
try:
|
||||
table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")
|
||||
|
||||
#go though each row in table with short notation and name
|
||||
#do not consider first row (tr[not(position()=1)]) since caption only
|
||||
for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
|
||||
try:
|
||||
short = row.find_element("xpath", ".//td[1]").text
|
||||
print(short)
|
||||
name = row.find_element("xpath", ".//td[2]").text
|
||||
#append short notation to the set of all persons, but use tuple, because they are differentiable
|
||||
persons.add(tuple([short, name]))
|
||||
except NoSuchElementException:
|
||||
print("NoSuchElementException")
|
||||
except:
|
||||
print("no such tables found, where we get person names")
|
||||
except:
|
||||
print("could not connect to group webpage")
|
||||
|
||||
|
||||
|
||||
#driver.close()
|
||||
return persons
|
||||
|
||||
|
||||
|
||||
'''
|
||||
scrappes all data from Dobby persons page
|
||||
which only are their phone numbers
|
||||
it is the same order as the names
|
||||
IN:
|
||||
driver: driver Selenium object
|
||||
names: Array(n,2) of string
|
||||
OUT:
|
||||
allNumbers: array(n, 2) of string
|
||||
'''
|
||||
def getDobbyPersonData(driver, names):
|
||||
personPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?user="
|
||||
allNumbers = np.zeros((len(names), 2), dtype='U60')
|
||||
|
||||
for s, short in enumerate(names):
|
||||
print(short[0]) #short[0] has kuerzel and short[1] the full name
|
||||
print(personPath + short[0])
|
||||
driver.get(personPath + short[0])
|
||||
|
||||
mobileNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[9]/td[2]").text
|
||||
print(mobileNumber)
|
||||
#phoneNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[8]/td[2]").text
|
||||
allNumbers[s, 0] = mobileNumber
|
||||
allNumbers[s, 1] = short[0]
|
||||
print(allNumbers)
|
||||
|
||||
return allNumbers
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
From here on only some tests when debugging
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
test the Driver in Docker with a public webpage on YOUTUBE and print some data
|
||||
also use the Proxy we need for Dobby
|
||||
'''
|
||||
def testDriver():
|
||||
#need to wait a little to assure that the selenium container is up and running, before connecting to it
|
||||
time.sleep(3)
|
||||
|
||||
options = webdriver.ChromeOptions()
|
||||
PROXY = "www-cache.ppi.int:3128" #PPI Proxy Name
|
||||
options.add_argument('--disable-blink-features=AutomationControlled')
|
||||
options.add_argument('--ignore-ssl-errors=yes')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
#options.add_argument('--proxy-server=%s' % PROXY) #set Proxy to get onto dobby.int, but does not work
|
||||
|
||||
print("Now will connect")
|
||||
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
|
||||
#also need _vimes since we changed the name of the container in this dir
|
||||
print("did connect successfully")
|
||||
|
||||
|
||||
#just a test, to see if it works for a public website => it does
|
||||
pathTest = "https://about.youtube/"
|
||||
driver.get(pathTest)
|
||||
print(pathTest)
|
||||
print("someTesting only")
|
||||
#print("full page")
|
||||
#print(driver.page_source)
|
||||
|
||||
#driver will wait for up to 10 seconds
|
||||
wait = WebDriverWait(driver, 10)
|
||||
|
||||
try:
|
||||
elements = wait.until(EC.presence_of_all_elements_located(("xpath", '//*[@id="content"]')))
|
||||
print(elements)
|
||||
for e in elements:
|
||||
print(e.get_attribute("class"))
|
||||
except:
|
||||
print("Youtube does not work")
|
||||
|
||||
try:
|
||||
print("now the element of the wrong page")
|
||||
elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
|
||||
print(elements)
|
||||
except:
|
||||
print("at least not the crashing proxy webpage")
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
This was used to test the connection to the Dobby webpage
|
||||
for example if the Proxy can help to connect
|
||||
or how the crashed webpage looks like
|
||||
'''
|
||||
def testDobbyCrash():
|
||||
time.sleep(5)
|
||||
#define a driver = mousepointer using Chrome as Browser (Firefox did not work)
|
||||
options = webdriver.ChromeOptions()
|
||||
PROXY = "www-cache.ppi.int:3128" #PPI Proxy Name
|
||||
options.add_argument('--disable-blink-features=AutomationControlled')
|
||||
options.add_argument('--ignore-ssl-errors=yes')
|
||||
options.add_argument('--ignore-certificate-errors')
|
||||
#options.add_argument('--proxy-server=%s' % PROXY) #set Proxy to get onto dobby.int
|
||||
|
||||
print("Now will connect")
|
||||
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
|
||||
#also need _vimes since we changed the name of the container in this dir
|
||||
print("did connect successfully")
|
||||
|
||||
|
||||
#define the link to access and access it
|
||||
pathDobbyProjects = "https://dobby.ppi.int"
|
||||
print(pathDobbyProjects)
|
||||
driver.get(pathDobbyProjects)
|
||||
time.sleep(5)
|
||||
print("full page")
|
||||
print(driver.find_elements("xpath", "/html/body/img"))
|
||||
#print(driver.page_source)
|
||||
|
||||
pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
|
||||
try:
|
||||
driver.get(pathDobbyProjects)
|
||||
print("on website now")
|
||||
|
||||
except:
|
||||
print("could not connect to Dobby website")
|
||||
|
||||
time.sleep(5)
|
||||
notFound = driver.find_elements("xpath", "/html/body/div/div[1]/div[1]/h1")
|
||||
print("is website not found?")
|
||||
print(notFound)
|
||||
notFound = driver.find_elements("id", "root")
|
||||
print("maybe root?")
|
||||
print(notFound)
|
||||
|
||||
|
||||
#find the table with all projects (the lower table)
|
||||
table = driver.find_elements("xpath", "/html/body/div/div/table[2]/tbody/tr[3]")
|
||||
print(table)
|
||||
|
||||
|
||||
#driver will wait for up to 10 seconds
|
||||
wait = WebDriverWait(driver, 10)
|
||||
try:
|
||||
print("now the element of the wrong page")
|
||||
elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
|
||||
print(elements)
|
||||
except:
|
||||
print("at least not the crashing proxy webpage")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user