EmployeeDB/source/Dobby/Dobby.py

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import time

#some notes:
    #the links to find elements can be found with:
        #right click on browser and choose Untersuchen(Q)
        #shows all elements => find the needed table by hand


'''
define a driver = mousepointer using Chrome as Browser (Firefox did not work)
OUT:
    driver: driver Selenium Object, which can connect to website
'''
def getDriver():
    #definitely need sleep since first have to establish Selenium
    time.sleep(10)
    options = webdriver.ChromeOptions()
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--ignore-ssl-errors=yes')
    options.add_argument('--ignore-certificate-errors')

    print("Now will connect")
    driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options)          #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
                                                                                                                    #also need _vimes since we changed the name of the container in this dir
    print("did connect successfully")

    return driver


'''
find names of each Group of PPI.X on Dobby
IN:
    driver: driver Selenium Object, which can connect to website
OUT:
    groupsPpiX: Array(n) of string
'''
def findGroupNames(driver):
    groupsPpiX = []

    #define the link to access and access it
    pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
    print(pathDobbyProjects)
    try:
        driver.get(pathDobbyProjects)
        print("on website now")
    except:
        print("could not connect to Dobby website")

    #get data from Dobby
    try:
        #get the table with all projects (the lower table)
        table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")

        #go through each row in that table
        for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
            try:
                groupId = row.find_element("xpath", ".//td[1]").text                        #first column in table is Group
                print(groupId)
                groupDesc = row.find_element("xpath", ".//td[2]").text                      #second is Description
                if groupId[:8] == 'prj_ppix':                                               #only choose groups starting like that = from PPI.X
                    groupsPpiX.append([groupId, groupDesc])                                 #append
            except NoSuchElementException:
                print("NoSuchElementException")
        #driver.close()
        print("done going through all projects")
    except:
        print("no such tables found, where we get group names")

    return groupsPpiX


'''
find all persons working at PPI.X
and save names and short notation
IN:
    groupsNames: Array(n,2) of string
OUT:
    persons: set(m) of string arrays(2)
'''
def getPersons(driver, groupsNames):
    persons = set()
    groupPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?group="

    for group, groupDesc in groupsNames:
        try:
            driver.get(groupPath + group)
            try:
                table = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody")

                #go though each row in table with short notation and name
                #do not consider first row (tr[not(position()=1)]) since caption only
                for row in table.find_elements("xpath", ".//tr[not(position()=1)]"):
                    try:
                        short = row.find_element("xpath", ".//td[1]").text
                        print(short)
                        name = row.find_element("xpath", ".//td[2]").text
                        #append short notation to the set of all persons, but use tuple, because they are differentiable
                        persons.add(tuple([short, name]))
                    except NoSuchElementException:
                        print("NoSuchElementException")
            except:
                print("no such tables found, where we get person names")
        except:
            print("could not connect to group webpage")


    #driver.close()
    return persons


'''
scrappes all data from Dobby persons page
which only are their phone numbers
it is the same order as the names
IN:
    driver: driver Selenium object
    names: Array(n,2) of string
OUT:
    allNumbers: array(n, 2) of string
'''
def getDobbyPersonData(driver, names):
    personPath = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?user="
    allNumbers = np.zeros((len(names), 2), dtype='U60')

    for s, short in enumerate(names):
        print(short[0])                             #short[0] has kuerzel and short[1] the full name
        print(personPath + short[0])
        driver.get(personPath + short[0])

        mobileNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[9]/td[2]").text
        print(mobileNumber)
        #phoneNumber = driver.find_element("xpath", "/html/body/div/div/table[2]/tbody/tr[8]/td[2]").text
        allNumbers[s, 0] = mobileNumber
        allNumbers[s, 1] = short[0]
        print(allNumbers)

    return allNumbers


'''
From here on only some tests when debugging
'''


'''
test the Driver in Docker with a public webpage on YOUTUBE and print some data
also use the Proxy we need for Dobby
'''
def testDriver():
    #need to wait a little to assure that the selenium container is up and running, before connecting to it
    time.sleep(3)

    options = webdriver.ChromeOptions()
    PROXY = "www-cache.ppi.int:3128"                                                                            #PPI Proxy Name
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--ignore-ssl-errors=yes')
    options.add_argument('--ignore-certificate-errors')
    #options.add_argument('--proxy-server=%s' % PROXY)                                                           #set Proxy to get onto dobby.int, but does not work

    print("Now will connect")
    driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options)          #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
                                                                                                                    #also need _vimes since we changed the name of the container in this dir
    print("did connect successfully")


    #just a test, to see if it works for a public website => it does
    pathTest = "https://about.youtube/"
    driver.get(pathTest)
    print(pathTest)
    print("someTesting only")
    #print("full page")
    #print(driver.page_source)

    #driver will wait for up to 10 seconds
    wait = WebDriverWait(driver, 10)

    try:
        elements = wait.until(EC.presence_of_all_elements_located(("xpath", '//*[@id="content"]')))
        print(elements)
        for e in elements:
            print(e.get_attribute("class"))
    except:
        print("Youtube does not work")

    try:
        print("now the element of the wrong page")
        elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
        print(elements)
    except:
        print("at least not the crashing proxy webpage")


'''
This was used to test the connection to the Dobby webpage
for example if the Proxy can help to connect
            or how the crashed webpage looks like
'''
def testDobbyCrash():
    time.sleep(5)
    #define a driver = mousepointer using Chrome as Browser (Firefox did not work)
    options = webdriver.ChromeOptions()
    PROXY = "www-cache.ppi.int:3128"                                                                            #PPI Proxy Name
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--ignore-ssl-errors=yes')
    options.add_argument('--ignore-certificate-errors')
    #options.add_argument('--proxy-server=%s' % PROXY)                                                           #set Proxy to get onto dobby.int

    print("Now will connect")
    driver = webdriver.Remote(command_executor='http://sel_browser_test_vimes:4444/wd/hub', options=options)          #sel_browser_test gives the URL of that container, so that we always use correct URL to connect
                                                                                                                    #also need _vimes since we changed the name of the container in this dir
    print("did connect successfully")


    #define the link to access and access it
    pathDobbyProjects = "https://dobby.ppi.int"
    print(pathDobbyProjects)
    driver.get(pathDobbyProjects)
    time.sleep(5)
    print("full page")
    print(driver.find_elements("xpath", "/html/body/img"))
    #print(driver.page_source)

    pathDobbyProjects = "https://dobby.ppi.int/cgi-bin/ad/adgroups.pl?ou=Projekte"
    try:
        driver.get(pathDobbyProjects)
        print("on website now")

    except:
        print("could not connect to Dobby website")

    time.sleep(5)
    notFound = driver.find_elements("xpath", "/html/body/div/div[1]/div[1]/h1")
    print("is website not found?")
    print(notFound)
    notFound = driver.find_elements("id", "root")
    print("maybe root?")
    print(notFound)


    #find the table with all projects (the lower table)
    table = driver.find_elements("xpath", "/html/body/div/div/table[2]/tbody/tr[3]")
    print(table)


    #driver will wait for up to 10 seconds
    wait = WebDriverWait(driver, 10)
    try:
        print("now the element of the wrong page")
        elements = wait.until(EC.presence_of_all_elements_located(("id", 'sub-frame-error')))
        print(elements)
    except:
        print("at least not the crashing proxy webpage")