Upload files to "source/Confluence"
This commit is contained in:
parent
68f7bd31ad
commit
fe28536b7b
355
source/Confluence/Confluence.py
Normal file
355
source/Confluence/Confluence.py
Normal file
@ -0,0 +1,355 @@
|
||||
from selenium import webdriver
|
||||
import json
|
||||
import getpass
|
||||
|
||||
#gets all basic information on the person which is synchronized with persis
|
||||
#saves them in a json format
|
||||
def get_persis_info(driver, name):
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Mrozek,%20Emily%20(em)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
general = driver.find_element("id", "general-de")
|
||||
table = general.find_element("tag name", "tbody")
|
||||
|
||||
job_description = general.find_element("xpath", "./p/span").text
|
||||
|
||||
role = general.find_element("xpath", "./p/i").text
|
||||
|
||||
email = table.find_element("xpath", "./tr[1]/td[2]/a/h4").text
|
||||
|
||||
phone_number = table.find_element("xpath", "./tr[2]/td[2]/a/h4").text
|
||||
|
||||
location = table.find_element("xpath", "./tr[4]/td[2]/h4").text
|
||||
|
||||
business_area = table.find_element("xpath", "./tr[5]/td[2]/h4").text
|
||||
|
||||
unit = table.find_element("xpath", "./tr[6]/td[2]/h4").text
|
||||
|
||||
team = table.find_element("xpath", "./tr[7]/td[2]/h4").text
|
||||
|
||||
supervisor = table.find_element("xpath", "./tr[8]/td[2]/a/h4").text
|
||||
|
||||
persis_info = {
|
||||
"job_description": job_description,
|
||||
"role": role,
|
||||
"email": email,
|
||||
"phone number": phone_number,
|
||||
"location": location,
|
||||
"business area": business_area,
|
||||
"unit": unit,
|
||||
"team": team,
|
||||
"supervisor": supervisor
|
||||
}
|
||||
|
||||
print(json.dumps(persis_info, indent=4))
|
||||
return(persis_info)
|
||||
|
||||
#gets additional information on the person which is added manually in each profile
|
||||
#saves them in a json format
|
||||
def get_additional_info(driver, name):
|
||||
|
||||
#names = get_names()
|
||||
#for kuerzel in names:
|
||||
#look on page of specific person
|
||||
#print(kuerzel)
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Stender,%20Jakob%20(jste)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
general = driver.find_element("id", "general-de")
|
||||
table = general.find_element("tag name", "tbody")
|
||||
|
||||
try:
|
||||
language = table.find_element("xpath", "./tr[10]/td[2]/h4").text
|
||||
except:
|
||||
print("Languages could not be extracted")
|
||||
|
||||
try:
|
||||
qualification = table.find_element("xpath", "./tr[11]/td[2]/h4").text
|
||||
except:
|
||||
print("Qualifications could not be extracted")
|
||||
|
||||
try:
|
||||
duration = table.find_element("xpath", "./tr[12]/td[2]/h4").text
|
||||
except:
|
||||
print("Duration could not be extracted")
|
||||
|
||||
additional_info = {
|
||||
"language": language,
|
||||
"qualification": qualification,
|
||||
"duration": duration
|
||||
}
|
||||
|
||||
print(json.dumps(additional_info, indent=4))
|
||||
return(additional_info)
|
||||
|
||||
#gets information on the main focus of the person and their special skills
|
||||
#saves them in a json format
|
||||
def get_main_focus(driver, name):
|
||||
#names = get_names()
|
||||
#for kuerzel in names:
|
||||
#look on page of specific person
|
||||
#print(kuerzel)
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Schreiber,%20Philip%20(phs)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
experience = {
|
||||
"consulting experience": [],
|
||||
"project management experience": [],
|
||||
"development experience": [],
|
||||
"skills": []
|
||||
}
|
||||
|
||||
emphasis_container = driver.find_element("id", "pane-emphasis-de")
|
||||
|
||||
try:
|
||||
consulting = emphasis_container.find_element("xpath", "./div/div[1]")
|
||||
consulting_experience = get_content_as_list(consulting, "./div/div[2]", "li")
|
||||
|
||||
experience["consulting experience"].append(consulting_experience)
|
||||
|
||||
except:
|
||||
print("Consulting Experience could not be extracted")
|
||||
|
||||
try:
|
||||
project_management = emphasis_container.find_element("xpath", "./div/div[2]")
|
||||
project_management_experience = get_content_as_list(project_management, "./div/div[2]", "li")
|
||||
|
||||
experience["project management experience"].append(project_management_experience)
|
||||
|
||||
except:
|
||||
print("Project Management Experience could not be extracted")
|
||||
|
||||
try:
|
||||
development = emphasis_container.find_element("xpath", "./div/div[3]")
|
||||
development_experience = get_content_as_list(development, "./div/div[2]", "li")
|
||||
|
||||
experience["development experience"].append(development_experience)
|
||||
except:
|
||||
print("Development Experience could not be extratced")
|
||||
|
||||
try:
|
||||
skill_container = driver.find_element("id", "mainSkills-de")
|
||||
|
||||
try:
|
||||
main_skill_1 = skill_container.find_element("xpath", "./h3[1]")
|
||||
main_skill_1_width= skill_container.find_element("xpath", "./div[2]/div").get_attribute("style")
|
||||
main_skill_1_score = main_skill_1_width[7:9]
|
||||
main_skill_1_set = {main_skill_1.text: int(main_skill_1_score)}
|
||||
experience["skills"].append(main_skill_1_set)
|
||||
except:
|
||||
print("Skill1 could not be extracted")
|
||||
|
||||
try:
|
||||
main_skill_2 = skill_container.find_element("xpath", "./h3[2]")
|
||||
main_skill_2_width= skill_container.find_element("xpath", "./div[3]/div").get_attribute("style")
|
||||
main_skill_2_score = main_skill_2_width[7:9]
|
||||
main_skill_2_set = {main_skill_2.text: int(main_skill_2_score)}
|
||||
experience["skills"].append(main_skill_2_set)
|
||||
except:
|
||||
print("Skill2 could not be extracted")
|
||||
|
||||
|
||||
try:
|
||||
main_skill_3 = skill_container.find_element("xpath", "./h3[3]")
|
||||
main_skill_3_width= skill_container.find_element("xpath", "./div[4]/div").get_attribute("style")
|
||||
main_skill_3_score = main_skill_3_width[7:9]
|
||||
main_skill_3_set = {main_skill_3.text: int(main_skill_3_score)}
|
||||
experience["skills"].append(main_skill_3_set)
|
||||
except:
|
||||
print("Skill3 could not be extracted")
|
||||
|
||||
|
||||
print(json.dumps(experience, indent=4))
|
||||
except:
|
||||
None
|
||||
|
||||
return(experience)
|
||||
|
||||
#gets information on the different skills
|
||||
#saves them in a json format
|
||||
def get_skills(driver, name):
|
||||
#names = get_names()
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Mrozek,%20Emily%20(em)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
skills = {"technical": [],
|
||||
"field":[],
|
||||
"other":[]}
|
||||
|
||||
skills_container = driver.find_element("id", "pane-skills-de")
|
||||
|
||||
try:
|
||||
technical = skills_container.find_element("xpath", "./div/div[1]")
|
||||
|
||||
dev_skills = get_content_as_list(technical, "./div[1]/div[2]", "li")
|
||||
|
||||
languages = get_content_as_list(technical, "./div[2]/div[2]", "li")
|
||||
|
||||
runtime_environments = get_content_as_list(technical, "./div[3]/div[2]", "li")
|
||||
|
||||
operating_systems = get_content_as_list(technical, "./div[4]/div[2]", "li")
|
||||
|
||||
database_management_systems= get_content_as_list(technical, "./div[5]/div[2]", "li")
|
||||
|
||||
communication_and_networks = get_content_as_list(technical, "./div[6]/div[2]", "li")
|
||||
|
||||
skill_dict = {"software development": dev_skills,
|
||||
"programming languages": languages,
|
||||
"runtime environments": runtime_environments,
|
||||
"operating systems": operating_systems,
|
||||
"database management systems": database_management_systems,
|
||||
"communication and networks": communication_and_networks}
|
||||
skills["technical"].append(skill_dict)
|
||||
print(json.dumps(skills, indent=4))
|
||||
except:
|
||||
print("Technical Skills could not be extracted")
|
||||
|
||||
try:
|
||||
field = skills_container.find_element("xpath", "./div/div[2]")
|
||||
|
||||
industry_skills = get_content_as_list(field, "./div[1]/div[2]", "li")
|
||||
|
||||
consulting = get_content_as_list(field, "./div[2]/div[2]", "li")
|
||||
|
||||
|
||||
skill_dict = {"industry and special skills": industry_skills,
|
||||
"consulting": consulting
|
||||
}
|
||||
skills["field"].append(skill_dict)
|
||||
print(json.dumps(skills, indent=4))
|
||||
except:
|
||||
print("Field Skills could not be extracted")
|
||||
|
||||
try:
|
||||
other_skills = get_content_as_list(skills_container, "./div/div[3]", "li")
|
||||
|
||||
skills["other"].append(other_skills)
|
||||
print(json.dumps(skills, indent=4))
|
||||
except:
|
||||
print("Other Skills could not be extracted")
|
||||
|
||||
return(skills)
|
||||
|
||||
#gets information on the publications and certifications of each person
|
||||
#saves them in a json format
|
||||
def get_life_events(driver, name):
|
||||
#names = get_names()
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Schreiber,%20Philip%20(phs)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
life_event_record = {
|
||||
"certificates": [],
|
||||
"publications": []
|
||||
}
|
||||
life_events = driver.find_element("id", "pane-lifeEvents-de")
|
||||
|
||||
try:
|
||||
certificates = life_events.find_elements("class name", "certificate")
|
||||
|
||||
for certificate in certificates:
|
||||
title_and_date = certificate.find_element("xpath", "./div[1]").text
|
||||
issuer = certificate.find_element("xpath", "./div[2]").text
|
||||
name = title_and_date[:-8]
|
||||
date = title_and_date[-7:]
|
||||
|
||||
certification = {"name": name,
|
||||
"date": date,
|
||||
"issuer": issuer}
|
||||
|
||||
life_event_record["certificates"].append(certification)
|
||||
|
||||
print(json.dumps(life_event_record, indent=4))
|
||||
|
||||
except:
|
||||
print("Certificates could not be extracted")
|
||||
|
||||
try:
|
||||
publications = life_events.find_elements("class name", "publication")
|
||||
|
||||
for publication in publications:
|
||||
title_and_date = publication.find_element("xpath", "./div[1]").text
|
||||
publisher = publication.find_element("xpath", "./div[2]").text
|
||||
title = title_and_date[:-8]
|
||||
date = title_and_date[-7:]
|
||||
|
||||
pub = {"title": title,
|
||||
"date": date,
|
||||
"publisher": publisher}
|
||||
|
||||
life_event_record["publications"].append(pub)
|
||||
|
||||
print(json.dumps(life_event_record, indent=4))
|
||||
|
||||
except:
|
||||
print("Publications could not be extracted")
|
||||
return(life_event_record)
|
||||
|
||||
#gets information on all the projects and the related tasks of each person
|
||||
#saves them in a json format
|
||||
def get_projects(driver, name):
|
||||
#names = get_names()
|
||||
driver.get("https://confluence.ppi.de/masterprofiles/viewprofile.action?username=" + name)
|
||||
#driver.get("file:///C:/Users/em/Downloads/Seifert,%20Raphael%20(rse)%20-%20Mitarbeiter-Masterprofile%20-%20Confluence.html")
|
||||
|
||||
project_table = driver.find_element("id", "pane-projects-de")
|
||||
projects = project_table.find_elements("tag name", "table")
|
||||
|
||||
all_projects = {"project": []}
|
||||
|
||||
try:
|
||||
for project in projects:
|
||||
period = project.find_element("class name", "projectPeriod").text
|
||||
title = project.find_element("class name", "projectTitle").text
|
||||
role = project.find_element("class name", "projectRole").text
|
||||
client = project.find_element("class name", "projectClient").text
|
||||
department = project.find_element("class name", "projectDepartment").text
|
||||
description = project.find_element("class name", "projectDescription").text
|
||||
tasks = get_content_as_list(project, "./tbody/tr/td[2]", "li")
|
||||
|
||||
proj = {
|
||||
"period": period,
|
||||
"title": title,
|
||||
"role": role,
|
||||
"client": client,
|
||||
"department": department,
|
||||
"description": description,
|
||||
"tasks": tasks
|
||||
}
|
||||
|
||||
all_projects["project"].append(proj)
|
||||
|
||||
print(json.dumps(all_projects, indent=4))
|
||||
|
||||
except:
|
||||
print("Projects could not be extracted")
|
||||
|
||||
return(all_projects)
|
||||
|
||||
|
||||
#extracts content from a certain tag and saves it in a list
|
||||
def get_content_as_list(driver, relative_content_path, type):
|
||||
|
||||
content_div = driver.find_element("xpath", relative_content_path)
|
||||
list_elements = content_div.find_elements("tag name",type)
|
||||
element_list = []
|
||||
|
||||
for item in list_elements:
|
||||
element_list.append(item.text)
|
||||
|
||||
return(element_list)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with webdriver.Chrome() as driver:
|
||||
# manuel Login
|
||||
driver.get("https://confluence.ppi.de")
|
||||
getpass.getpass("Press Enter after You are done logging in")
|
||||
|
||||
kuerzel = 'lawi'
|
||||
|
||||
#get_persis_info(driver, kuerzel)
|
||||
#get_additional_info(driver, kuerzel)
|
||||
#get_main_focus(driver, kuerzel)
|
||||
get_skills(driver, kuerzel)
|
||||
#get_life_events(driver, kuerzel)
|
||||
#get_projects(driver, kuerzel)
|
||||
130
source/Confluence/json_to_df.py
Normal file
130
source/Confluence/json_to_df.py
Normal file
@ -0,0 +1,130 @@
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def convert_json_to_dataframe() -> None:
|
||||
directory = "source\Data"
|
||||
|
||||
df_all_global_info = pd.DataFrame()
|
||||
df_all_languages = pd.DataFrame()
|
||||
df_all_main_skills = pd.DataFrame()
|
||||
df_all_skills = pd.DataFrame()
|
||||
df_all_projects = pd.DataFrame()
|
||||
df_all_project_tasks = pd.DataFrame()
|
||||
df_all_emphasis = pd.DataFrame()
|
||||
df_all_publications = pd.DataFrame()
|
||||
df_all_certificates = pd.DataFrame()
|
||||
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file() and entry.name.endswith(".json"):
|
||||
path = entry.path
|
||||
short = entry.name.removesuffix(".json")
|
||||
|
||||
with open(path, "r") as f:
|
||||
data = json.loads(f.read())
|
||||
|
||||
# collects all global information on a person
|
||||
df_global_info = pd.json_normalize(data.get("global", []))
|
||||
df_global_info["graduation"] = data.get("de").get("graduation")
|
||||
df_global_info["career"] = data.get("de").get("career")
|
||||
if hasattr(df_global_info, "yearOfBirth"):
|
||||
df_global_info = df_global_info.drop(["yearOfBirth"], axis=1)
|
||||
df_global_info["short"] = short
|
||||
df_all_global_info = pd.concat([df_global_info, df_all_global_info])
|
||||
|
||||
# collects all the languages a person speaks
|
||||
df_languages = pd.DataFrame(data["de"]["languages"], columns=["language"])
|
||||
df_languages["short"] = short
|
||||
df_all_languages = pd.concat([df_languages, df_all_languages])
|
||||
|
||||
# collects the main skills and the according scores
|
||||
df_main_skills = pd.json_normalize(data["de"]["mainSkills"])
|
||||
try:
|
||||
main_skill1 = df_main_skills[["mainSkill1Title", "mainSkill1Quality"]]
|
||||
main_skill1 = main_skill1.rename(
|
||||
{"mainSkill1Title": "skillName", "mainSkill1Quality": "skillLevel"},
|
||||
axis=1,
|
||||
)
|
||||
except:
|
||||
main_skill1 = pd.DataFrame()
|
||||
try:
|
||||
main_skill2 = df_main_skills[["mainSkill2Title", "mainSkill2Quality"]]
|
||||
main_skill2 = main_skill2.rename(
|
||||
{"mainSkill2Title": "skillName", "mainSkill2Quality": "skillLevel"},
|
||||
axis=1,
|
||||
)
|
||||
except:
|
||||
main_skill2 = pd.DataFrame()
|
||||
try:
|
||||
main_skill3 = df_main_skills[["mainSkill3Title", "mainSkill3Quality"]]
|
||||
main_skill3 = main_skill3.rename(
|
||||
{"mainSkill3Title": "skillName", "mainSkill3Quality": "skillLevel"},
|
||||
axis=1,
|
||||
)
|
||||
except:
|
||||
main_skill3 = pd.DataFrame()
|
||||
df_main_skills = pd.concat([main_skill1, main_skill2, main_skill3])
|
||||
df_main_skills["short"] = short
|
||||
df_all_main_skills = pd.concat([df_main_skills, df_all_main_skills])
|
||||
|
||||
# collects all skills by skill type
|
||||
df_skill_all = pd.json_normalize(data.get("de", []).get("skills", []))
|
||||
|
||||
df_skills = (
|
||||
pd.DataFrame(df_skill_all.items(), columns=["skillType", "skill"])
|
||||
.explode("skill")
|
||||
.explode("skill")
|
||||
)
|
||||
df_skills["short"] = short
|
||||
df_all_skills = pd.concat([df_skills, df_all_skills])
|
||||
|
||||
# collects all projects
|
||||
df_projects = pd.json_normalize(data.get("de", []).get("projects", []))
|
||||
if hasattr(df_projects, "favorite"):
|
||||
df_projects = df_projects.drop(["favorite"], axis=1)
|
||||
if hasattr(df_projects, "tasks"):
|
||||
df_projects = df_projects.drop(["tasks"], axis=1)
|
||||
df_projects["short"] = short
|
||||
df_all_projects = pd.concat([df_projects, df_all_projects])
|
||||
|
||||
# collects all tasks per project and person
|
||||
df_project_tasks = pd.json_normalize(
|
||||
data.get("de", []).get("projects", []),
|
||||
record_path=["tasks"],
|
||||
meta=["title"],
|
||||
)
|
||||
df_project_tasks.rename({0: "task"}, axis=1, inplace=True)
|
||||
df_project_tasks["short"] = short
|
||||
df_all_project_tasks = pd.concat([df_project_tasks, df_all_project_tasks])
|
||||
|
||||
# collects all tasks per main emphasis
|
||||
df_emphasis_all = pd.json_normalize(data.get("de", []).get("emphasis", []))
|
||||
df_emphasis = (
|
||||
pd.DataFrame(df_emphasis_all.items(), columns=["emphasis", "skill"])
|
||||
.explode("skill")
|
||||
.explode("skill")
|
||||
)
|
||||
df_emphasis["short"] = short
|
||||
df_all_emphasis = pd.concat([df_emphasis, df_all_emphasis])
|
||||
|
||||
# collects all publications from a person
|
||||
# if there are no publications, an empty list is created
|
||||
df_publications = pd.json_normalize(data.get("de", []).get("publications", []))
|
||||
df_publications["short"] = short
|
||||
df_all_publications = pd.concat([df_publications, df_all_publications])
|
||||
|
||||
# collects all certificates a person has
|
||||
df_certificates = pd.json_normalize(data.get("de", []).get("certificates", []))
|
||||
df_certificates["short"] = short
|
||||
df_all_certificates = pd.concat([df_certificates, df_all_certificates])
|
||||
|
||||
df_all_global_info.to_csv("source\Tables\global_info.csv")
|
||||
df_all_languages.to_csv("source\Tables\languages.csv")
|
||||
df_all_main_skills.to_csv("source\Tables\main_skills.csv")
|
||||
df_all_skills.to_csv("source\Tables\skills.csv")
|
||||
df_all_projects.to_csv("source\Tables\projects.csv")
|
||||
df_all_project_tasks.to_csv("source\Tables\project_tasks.csv")
|
||||
df_all_emphasis.to_csv("source\Tables\emphasis.csv")
|
||||
df_all_publications.to_csv("source\Tables\publications.csv")
|
||||
df_all_certificates.to_csv("source\Tables\certificates.csv")
|
||||
109
source/Confluence/test.ipynb
Normal file
109
source/Confluence/test.ipynb
Normal file
@ -0,0 +1,109 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from selenium import webdriver\n",
|
||||
"import json\n",
|
||||
"import getpass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"name = 'lawi'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"job_description\": \"Senior Consultant (L2201)\",\n",
|
||||
" \"role\": \"Senior Consultant\",\n",
|
||||
" \"email\": \"Lasse.Wiedemann@crossnative.com\",\n",
|
||||
" \"phone number\": \"+4943188810-0\",\n",
|
||||
" \"location\": \"GS Hamburg\",\n",
|
||||
" \"business area\": \"GB PPI-X\",\n",
|
||||
" \"unit\": \"X-Consulting, sada\",\n",
|
||||
" \"team\": \"CX, sada, Team DnA olsc\",\n",
|
||||
" \"supervisor\": \"Ole Schmidt (olsc)\"\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"driver = webdriver.Chrome()\n",
|
||||
"driver.get(\"https://confluence.ppi.de\")\n",
|
||||
"getpass.getpass(\"Press Enter after You are done logging in\")\n",
|
||||
"driver.get(\"https://confluence.ppi.de/masterprofiles/viewprofile.action?username=\" + name)\n",
|
||||
"general = driver.find_element(\"id\", \"general-de\")\n",
|
||||
"table = general.find_element(\"tag name\", \"tbody\")\n",
|
||||
"job_description = general.find_element(\"xpath\", \"./p/span\").text \n",
|
||||
"role = general.find_element(\"xpath\", \"./p/i\").text\n",
|
||||
"email = table.find_element(\"xpath\", \"./tr[1]/td[2]/a/h4\").text\n",
|
||||
"phone_number = table.find_element(\"xpath\", \"./tr[2]/td[2]/a/h4\").text\n",
|
||||
"location = table.find_element(\"xpath\", \"./tr[4]/td[2]/h4\").text\n",
|
||||
"business_area = table.find_element(\"xpath\", \"./tr[5]/td[2]/h4\").text\n",
|
||||
"unit = table.find_element(\"xpath\", \"./tr[6]/td[2]/h4\").text\n",
|
||||
"team = table.find_element(\"xpath\", \"./tr[7]/td[2]/h4\").text\n",
|
||||
"supervisor = table.find_element(\"xpath\", \"./tr[8]/td[2]/a/h4\").text\n",
|
||||
"persis_info = {\n",
|
||||
" \"job_description\": job_description,\n",
|
||||
" \"role\": role,\n",
|
||||
" \"email\": email,\n",
|
||||
" \"phone number\": phone_number,\n",
|
||||
" \"location\": location,\n",
|
||||
" \"business area\": business_area,\n",
|
||||
" \"unit\": unit,\n",
|
||||
" \"team\": team,\n",
|
||||
" \"supervisor\": supervisor\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(json.dumps(persis_info, indent=4))\n",
|
||||
"\n",
|
||||
"driver.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user