EmployeeDB/source/Confluence/Confluence.py

from selenium import webdriver
import json
import getpass

#gets all basic information on the person which is synchronized with persis
#saves them in a json format
def get_persis_info(driver, name):
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
    #driver.get("file:///C:/Users/em/Downloads/Mrozek,%20Emily%20(em)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    general = driver.find_element("id", "general-de")
    table = general.find_element("tag name", "tbody")

    job_description = general.find_element("xpath", "./p/span").text

    role = general.find_element("xpath", "./p/i").text

    email = table.find_element("xpath", "./tr[1]/td[2]/a/h4").text

    phone_number = table.find_element("xpath", "./tr[2]/td[2]/a/h4").text

    location = table.find_element("xpath", "./tr[4]/td[2]/h4").text

    business_area = table.find_element("xpath", "./tr[5]/td[2]/h4").text

    unit = table.find_element("xpath", "./tr[6]/td[2]/h4").text

    team = table.find_element("xpath", "./tr[7]/td[2]/h4").text

    supervisor = table.find_element("xpath", "./tr[8]/td[2]/a/h4").text

    persis_info = {
        "job_description": job_description,
        "role": role,
        "email": email,
        "phone number": phone_number,
        "location": location,
        "business area": business_area,
        "unit": unit,
        "team": team,
        "supervisor": supervisor
    }

    print(json.dumps(persis_info, indent=4))
    return(persis_info)

#gets additional information on the person which is added manually in each profile
#saves them in a json format
def get_additional_info(driver, name):

    #names = get_names()
     #for kuerzel in names:
        #look on page of specific person
        #print(kuerzel)
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
        #driver.get("file:///C:/Users/em/Downloads/Stender,%20Jakob%20(jste)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    general = driver.find_element("id", "general-de")
    table = general.find_element("tag name", "tbody")

    try:
        language = table.find_element("xpath", "./tr[10]/td[2]/h4").text
    except:
        print("Languages could not be extracted")

    try:
        qualification = table.find_element("xpath", "./tr[11]/td[2]/h4").text
    except:
        print("Qualifications could not be extracted")

    try:
        duration = table.find_element("xpath", "./tr[12]/td[2]/h4").text
    except:
        print("Duration could not be extracted")

    additional_info = {
        "language": language,
        "qualification": qualification,
        "duration": duration
    }

    print(json.dumps(additional_info, indent=4))
    return(additional_info)

#gets information on the main focus of the person and their special skills
#saves them in a json format
def get_main_focus(driver, name):
    #names = get_names()
     #for kuerzel in names:
        #look on page of specific person
        #print(kuerzel)
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
    #driver.get("file:///C:/Users/em/Downloads/Schreiber,%20Philip%20(phs)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    experience = {
    "consulting experience": [],
    "project management experience": [],
    "development experience": [],
    "skills": []
    }

    emphasis_container = driver.find_element("id", "pane-emphasis-de")

    try:
        consulting = emphasis_container.find_element("xpath", "./div/div[1]")
        consulting_experience = get_content_as_list(consulting, "./div/div[2]", "li")

        experience["consulting experience"].append(consulting_experience)

    except:
        print("Consulting Experience could not be extracted")

    try:
        project_management = emphasis_container.find_element("xpath", "./div/div[2]")
        project_management_experience = get_content_as_list(project_management, "./div/div[2]", "li")

        experience["project management experience"].append(project_management_experience)

    except:
        print("Project Management Experience could not be extracted")

    try:
        development = emphasis_container.find_element("xpath", "./div/div[3]")
        development_experience = get_content_as_list(development, "./div/div[2]", "li")

        experience["development experience"].append(development_experience)
    except:
        print("Development Experience could not be extratced")

    try:
        skill_container = driver.find_element("id", "mainSkills-de")

        try:
            main_skill_1 = skill_container.find_element("xpath", "./h3[1]")
            main_skill_1_width= skill_container.find_element("xpath", "./div[2]/div").get_attribute("style")
            main_skill_1_score = main_skill_1_width[7:9]
            main_skill_1_set = {main_skill_1.text: int(main_skill_1_score)}
            experience["skills"].append(main_skill_1_set)
        except:
            print("Skill1 could not be extracted")

        try:
            main_skill_2 = skill_container.find_element("xpath", "./h3[2]")
            main_skill_2_width= skill_container.find_element("xpath", "./div[3]/div").get_attribute("style")
            main_skill_2_score = main_skill_2_width[7:9]
            main_skill_2_set = {main_skill_2.text: int(main_skill_2_score)}
            experience["skills"].append(main_skill_2_set)
        except:
            print("Skill2 could not be extracted")


        try:
            main_skill_3 = skill_container.find_element("xpath", "./h3[3]")
            main_skill_3_width= skill_container.find_element("xpath", "./div[4]/div").get_attribute("style")
            main_skill_3_score = main_skill_3_width[7:9]
            main_skill_3_set = {main_skill_3.text: int(main_skill_3_score)}
            experience["skills"].append(main_skill_3_set)
        except:
            print("Skill3 could not be extracted")


        print(json.dumps(experience, indent=4))
    except:
        None

    return(experience)

#gets information on the different skills
#saves them in a json format
def get_skills(driver, name):
    #names = get_names()
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
    #driver.get("file:///C:/Users/em/Downloads/Mrozek,%20Emily%20(em)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    skills = {"technical": [],
              "field":[],
              "other":[]}

    skills_container = driver.find_element("id", "pane-skills-de")

    try:
        technical = skills_container.find_element("xpath", "./div/div[1]")

        dev_skills = get_content_as_list(technical, "./div[1]/div[2]", "li")

        languages = get_content_as_list(technical, "./div[2]/div[2]", "li")

        runtime_environments = get_content_as_list(technical, "./div[3]/div[2]", "li")

        operating_systems = get_content_as_list(technical, "./div[4]/div[2]", "li")

        database_management_systems= get_content_as_list(technical, "./div[5]/div[2]", "li")

        communication_and_networks = get_content_as_list(technical, "./div[6]/div[2]", "li")

        skill_dict = {"software development": dev_skills,
                      "programming languages": languages,
                      "runtime environments": runtime_environments,
                      "operating systems": operating_systems,
                      "database management systems": database_management_systems,
                      "communication and networks": communication_and_networks}
        skills["technical"].append(skill_dict)
        print(json.dumps(skills, indent=4))
    except:
        print("Technical Skills could not be extracted")

    try:
        field = skills_container.find_element("xpath", "./div/div[2]")

        industry_skills = get_content_as_list(field, "./div[1]/div[2]", "li")

        consulting = get_content_as_list(field, "./div[2]/div[2]", "li")


        skill_dict = {"industry and special skills": industry_skills,
                      "consulting": consulting
                      }
        skills["field"].append(skill_dict)
        print(json.dumps(skills, indent=4))
    except:
        print("Field Skills could not be extracted")

    try:
        other_skills = get_content_as_list(skills_container, "./div/div[3]", "li")

        skills["other"].append(other_skills)
        print(json.dumps(skills, indent=4))
    except:
        print("Other Skills could not be extracted")

    return(skills)

#gets information on the publications and certifications of each person
#saves them in a json format
def get_life_events(driver, name):
    #names = get_names()
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
    #driver.get("file:///C:/Users/em/Downloads/Schreiber,%20Philip%20(phs)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    life_event_record = {
        "certificates": [],
        "publications": []
    }
    life_events = driver.find_element("id", "pane-lifeEvents-de")

    try:
        certificates = life_events.find_elements("class name", "certificate")

        for certificate in certificates:
            title_and_date = certificate.find_element("xpath", "./div[1]").text
            issuer = certificate.find_element("xpath", "./div[2]").text
            name = title_and_date[:-8]
            date = title_and_date[-7:]

            certification = {"name": name,
                             "date": date,
                             "issuer": issuer}

            life_event_record["certificates"].append(certification)

        print(json.dumps(life_event_record, indent=4))

    except:
        print("Certificates could not be extracted")

    try:
        publications = life_events.find_elements("class name", "publication")

        for publication in publications:
            title_and_date = publication.find_element("xpath", "./div[1]").text
            publisher = publication.find_element("xpath", "./div[2]").text
            title = title_and_date[:-8]
            date = title_and_date[-7:]

            pub = {"title": title,
                             "date": date,
                             "publisher": publisher}

            life_event_record["publications"].append(pub)

        print(json.dumps(life_event_record, indent=4))

    except:
        print("Publications could not be extracted")
    return(life_event_record)

#gets information on all the projects and the related tasks of each person
#saves them in a json format
def get_projects(driver, name):
    #names = get_names()
    driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
    #driver.get("file:///C:/Users/em/Downloads/Seifert,%20Raphael%20(rse)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")

    project_table = driver.find_element("id", "pane-projects-de")
    projects = project_table.find_elements("tag name", "table")

    all_projects = {"project": []}

    try:
        for project in projects:
            period = project.find_element("class name", "projectPeriod").text
            title = project.find_element("class name", "projectTitle").text
            role = project.find_element("class name", "projectRole").text
            client = project.find_element("class name", "projectClient").text
            department = project.find_element("class name", "projectDepartment").text
            description = project.find_element("class name", "projectDescription").text
            tasks = get_content_as_list(project, "./tbody/tr/td[2]", "li")

            proj = {
                "period": period,
                "title": title,
                "role": role,
                "client": client,
                "department": department,
                "description": description,
                "tasks": tasks
            }

            all_projects["project"].append(proj)

        print(json.dumps(all_projects, indent=4))

    except:
        print("Projects could not be extracted")

    return(all_projects)


#extracts content from a certain tag and saves it in a list
def get_content_as_list(driver, relative_content_path, type):

    content_div = driver.find_element("xpath", relative_content_path)
    list_elements = content_div.find_elements("tag name",type)
    element_list = []

    for item in list_elements:
        element_list.append(item.text)

    return(element_list)


if __name__ == "__main__":
    with  webdriver.Chrome() as driver:
        # manuel Login
        driver.get("https://confluence.ppi.de")
        getpass.getpass("Press Enter after You are done logging in")

        kuerzel = 'lawi'

        #get_persis_info(driver, kuerzel)
        #get_additional_info(driver, kuerzel)
        #get_main_focus(driver, kuerzel)
        get_skills(driver, kuerzel)
        #get_life_events(driver, kuerzel)
        #get_projects(driver, kuerzel)