287 lines
10 KiB
Python

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import time
#some notes:
#the links to find elements can be found with:
#right click on browser and choose Untersuchen(Q)
#shows all elements => find the needed table by hand
'''
define a driver = mousepointer using Chrome as Browser (Firefox did not work)
OUT:
driver: driver Selenium Object, which can connect to website
'''
def getDriver():
#definitely need sleep since first have to establish Selenium
time.sleep(10)
options = webdriver.ChromeOptions()
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
print("Now will connect")
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
#also need _vimes since we changed the name of the container in this dir
print("did connect successfully")
return driver
'''
find names of each Group of PPI.X on Dobby
IN:
driver: driver Selenium Object, which can connect to website
OUT:
groupsPpiX: Array(n) of string
'''
def findGroupNames(driver):
groupsPpiX = []
#define the link to access and access it
pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
print(pathDobbyProjects)
try:
driver.get(pathDobbyProjects)
print("on website now")
except:
print("could not connect to Dobby website")
#get data from Dobby
try:
#get the table with all projects (the lower table)
table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")
#go through each row in that table
for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
try:
groupId = row.find_element("xpath", ".//td[1]").text #first column in table is Group
print(groupId)
groupDesc = row.find_element("xpath", ".//td[2]").text #second is Description
if groupId[:8] == 'prj_ppix': #only choose groups starting like that = from PPI.X
groupsPpiX.append([groupId, groupDesc]) #append
except NoSuchElementException:
print("NoSuchElementException")
#driver.close()
print("done going through all projects")
except:
print("no such tables found, where we get group names")
return groupsPpiX
'''
find all persons working at PPI.X
and save names and short notation
IN:
groupsNames: Array(n,2) of string
OUT:
persons: set(m) of string arrays(2)
'''
def getPersons(driver, groupsNames):
persons = set()
groupPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group="
for group, groupDesc in groupsNames:
try:
driver.get(groupPath + group)
try:
table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")
#go though each row in table with short notation and name
#do not consider first row (tr[not(position()=1)]) since caption only
for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
try:
short = row.find_element("xpath", ".//td[1]").text
print(short)
name = row.find_element("xpath", ".//td[2]").text
#append short notation to the set of all persons, but use tuple, because they are differentiable
persons.add(tuple([short, name]))
except NoSuchElementException:
print("NoSuchElementException")
except:
print("no such tables found, where we get person names")
except:
print("could not connect to group webpage")
#driver.close()
return persons
'''
scrappes all data from Dobby persons page
which only are their phone numbers
it is the same order as the names
IN:
driver: driver Selenium object
names: Array(n,2) of string
OUT:
allNumbers: array(n, 2) of string
'''
def getDobbyPersonData(driver, names):
personPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?user="
allNumbers = np.zeros((len(names), 2), dtype='U60')
for s, short in enumerate(names):
print(short[0]) #short[0] has kuerzel and short[1] the full name
print(personPath + short[0])
driver.get(personPath + short[0])
mobileNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[9]/td[2]").text
print(mobileNumber)
#phoneNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[8]/td[2]").text
allNumbers[s, 0] = mobileNumber
allNumbers[s, 1] = short[0]
print(allNumbers)
return allNumbers
'''
From here on only some tests when debugging
'''
'''
test the Driver in Docker with a public webpage on YOUTUBE and print some data
also use the Proxy we need for Dobby
'''
def testDriver():
#need to wait a little to assure that the selenium container is up and running, before connecting to it
time.sleep(3)
options = webdriver.ChromeOptions()
PROXY = "www-cache.ppi.int:3128" #PPI Proxy Name
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
#options.add_argument('--proxy-server=%s' % PROXY) #set Proxy to get onto dobby.int, but does not work
print("Now will connect")
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
#also need _vimes since we changed the name of the container in this dir
print("did connect successfully")
#just a test, to see if it works for a public website => it does
pathTest = "https://about.youtube/"
driver.get(pathTest)
print(pathTest)
print("someTesting only")
#print("full page")
#print(driver.page_source)
#driver will wait for up to 10 seconds
wait = WebDriverWait(driver, 10)
try:
elements = wait.until(EC.presence_of_all_elements_located(("xpath", '//*[@id="content"]')))
print(elements)
for e in elements:
print(e.get_attribute("class"))
except:
print("Youtube does not work")
try:
print("now the element of the wrong page")
elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
print(elements)
except:
print("at least not the crashing proxy webpage")
'''
This was used to test the connection to the Dobby webpage
for example if the Proxy can help to connect
or how the crashed webpage looks like
'''
def testDobbyCrash():
time.sleep(5)
#define a driver = mousepointer using Chrome as Browser (Firefox did not work)
options = webdriver.ChromeOptions()
PROXY = "www-cache.ppi.int:3128" #PPI Proxy Name
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
#options.add_argument('--proxy-server=%s' % PROXY) #set Proxy to get onto dobby.int
print("Now will connect")
driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options) #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
#also need _vimes since we changed the name of the container in this dir
print("did connect successfully")
#define the link to access and access it
pathDobbyProjects = "https://dobby.ppi.int"
print(pathDobbyProjects)
driver.get(pathDobbyProjects)
time.sleep(5)
print("full page")
print(driver.find_elements("xpath", "/html/body/img"))
#print(driver.page_source)
pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
try:
driver.get(pathDobbyProjects)
print("on website now")
except:
print("could not connect to Dobby website")
time.sleep(5)
notFound = driver.find_elements("xpath", "/html/body/div/div[1]/div[1]/h1")
print("is website not found?")
print(notFound)
notFound = driver.find_elements("id", "root")
print("maybe root?")
print(notFound)
#find the table with all projects (the lower table)
table = driver.find_elements("xpath", "/html/body/div/div/table[2]/tbody/tr[3]")
print(table)
#driver will wait for up to 10 seconds
wait = WebDriverWait(driver, 10)
try:
print("now the element of the wrong page")
elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
print(elements)
except:
print("at least not the crashing proxy webpage")