131 lines
5.4 KiB
Python
131 lines
5.4 KiB
Python
import json
|
|
import os
|
|
import pandas as pd
|
|
|
|
|
|
def convert_json_to_dataframe() -> None:
|
|
directory = "source\Data"
|
|
|
|
df_all_global_info = pd.DataFrame()
|
|
df_all_languages = pd.DataFrame()
|
|
df_all_main_skills = pd.DataFrame()
|
|
df_all_skills = pd.DataFrame()
|
|
df_all_projects = pd.DataFrame()
|
|
df_all_project_tasks = pd.DataFrame()
|
|
df_all_emphasis = pd.DataFrame()
|
|
df_all_publications = pd.DataFrame()
|
|
df_all_certificates = pd.DataFrame()
|
|
|
|
for entry in os.scandir(directory):
|
|
if entry.is_file() and entry.name.endswith(".json"):
|
|
path = entry.path
|
|
short = entry.name.removesuffix(".json")
|
|
|
|
with open(path, "r") as f:
|
|
data = json.loads(f.read())
|
|
|
|
# collects all global information on a person
|
|
df_global_info = pd.json_normalize(data.get("global", []))
|
|
df_global_info["graduation"] = data.get("de").get("graduation")
|
|
df_global_info["career"] = data.get("de").get("career")
|
|
if hasattr(df_global_info, "yearOfBirth"):
|
|
df_global_info = df_global_info.drop(["yearOfBirth"], axis=1)
|
|
df_global_info["short"] = short
|
|
df_all_global_info = pd.concat([df_global_info, df_all_global_info])
|
|
|
|
# collects all the languages a person speaks
|
|
df_languages = pd.DataFrame(data["de"]["languages"], columns=["language"])
|
|
df_languages["short"] = short
|
|
df_all_languages = pd.concat([df_languages, df_all_languages])
|
|
|
|
# collects the main skills and the according scores
|
|
df_main_skills = pd.json_normalize(data["de"]["mainSkills"])
|
|
try:
|
|
main_skill1 = df_main_skills[["mainSkill1Title", "mainSkill1Quality"]]
|
|
main_skill1 = main_skill1.rename(
|
|
{"mainSkill1Title": "skillName", "mainSkill1Quality": "skillLevel"},
|
|
axis=1,
|
|
)
|
|
except:
|
|
main_skill1 = pd.DataFrame()
|
|
try:
|
|
main_skill2 = df_main_skills[["mainSkill2Title", "mainSkill2Quality"]]
|
|
main_skill2 = main_skill2.rename(
|
|
{"mainSkill2Title": "skillName", "mainSkill2Quality": "skillLevel"},
|
|
axis=1,
|
|
)
|
|
except:
|
|
main_skill2 = pd.DataFrame()
|
|
try:
|
|
main_skill3 = df_main_skills[["mainSkill3Title", "mainSkill3Quality"]]
|
|
main_skill3 = main_skill3.rename(
|
|
{"mainSkill3Title": "skillName", "mainSkill3Quality": "skillLevel"},
|
|
axis=1,
|
|
)
|
|
except:
|
|
main_skill3 = pd.DataFrame()
|
|
df_main_skills = pd.concat([main_skill1, main_skill2, main_skill3])
|
|
df_main_skills["short"] = short
|
|
df_all_main_skills = pd.concat([df_main_skills, df_all_main_skills])
|
|
|
|
# collects all skills by skill type
|
|
df_skill_all = pd.json_normalize(data.get("de", []).get("skills", []))
|
|
|
|
df_skills = (
|
|
pd.DataFrame(df_skill_all.items(), columns=["skillType", "skill"])
|
|
.explode("skill")
|
|
.explode("skill")
|
|
)
|
|
df_skills["short"] = short
|
|
df_all_skills = pd.concat([df_skills, df_all_skills])
|
|
|
|
# collects all projects
|
|
df_projects = pd.json_normalize(data.get("de", []).get("projects", []))
|
|
if hasattr(df_projects, "favorite"):
|
|
df_projects = df_projects.drop(["favorite"], axis=1)
|
|
if hasattr(df_projects, "tasks"):
|
|
df_projects = df_projects.drop(["tasks"], axis=1)
|
|
df_projects["short"] = short
|
|
df_all_projects = pd.concat([df_projects, df_all_projects])
|
|
|
|
# collects all tasks per project and person
|
|
df_project_tasks = pd.json_normalize(
|
|
data.get("de", []).get("projects", []),
|
|
record_path=["tasks"],
|
|
meta=["title"],
|
|
)
|
|
df_project_tasks.rename({0: "task"}, axis=1, inplace=True)
|
|
df_project_tasks["short"] = short
|
|
df_all_project_tasks = pd.concat([df_project_tasks, df_all_project_tasks])
|
|
|
|
# collects all tasks per main emphasis
|
|
df_emphasis_all = pd.json_normalize(data.get("de", []).get("emphasis", []))
|
|
df_emphasis = (
|
|
pd.DataFrame(df_emphasis_all.items(), columns=["emphasis", "skill"])
|
|
.explode("skill")
|
|
.explode("skill")
|
|
)
|
|
df_emphasis["short"] = short
|
|
df_all_emphasis = pd.concat([df_emphasis, df_all_emphasis])
|
|
|
|
# collects all publications from a person
|
|
# if there are no publications, an empty list is created
|
|
df_publications = pd.json_normalize(data.get("de", []).get("publications", []))
|
|
df_publications["short"] = short
|
|
df_all_publications = pd.concat([df_publications, df_all_publications])
|
|
|
|
# collects all certificates a person has
|
|
df_certificates = pd.json_normalize(data.get("de", []).get("certificates", []))
|
|
df_certificates["short"] = short
|
|
df_all_certificates = pd.concat([df_certificates, df_all_certificates])
|
|
|
|
df_all_global_info.to_csv("source\Tables\global_info.csv")
|
|
df_all_languages.to_csv("source\Tables\languages.csv")
|
|
df_all_main_skills.to_csv("source\Tables\main_skills.csv")
|
|
df_all_skills.to_csv("source\Tables\skills.csv")
|
|
df_all_projects.to_csv("source\Tables\projects.csv")
|
|
df_all_project_tasks.to_csv("source\Tables\project_tasks.csv")
|
|
df_all_emphasis.to_csv("source\Tables\emphasis.csv")
|
|
df_all_publications.to_csv("source\Tables\publications.csv")
|
|
df_all_certificates.to_csv("source\Tables\certificates.csv")
|