import json import os import pandas as pd def convert_json_to_dataframe() -> None: directory = "source\Data" df_all_global_info = pd.DataFrame() df_all_languages = pd.DataFrame() df_all_main_skills = pd.DataFrame() df_all_skills = pd.DataFrame() df_all_projects = pd.DataFrame() df_all_project_tasks = pd.DataFrame() df_all_emphasis = pd.DataFrame() df_all_publications = pd.DataFrame() df_all_certificates = pd.DataFrame() for entry in os.scandir(directory): if entry.is_file() and entry.name.endswith(".json"): path = entry.path short = entry.name.removesuffix(".json") with open(path, "r") as f: data = json.loads(f.read()) # collects all global information on a person df_global_info = pd.json_normalize(data.get("global", [])) df_global_info["graduation"] = data.get("de").get("graduation") df_global_info["career"] = data.get("de").get("career") if hasattr(df_global_info, "yearOfBirth"): df_global_info = df_global_info.drop(["yearOfBirth"], axis=1) df_global_info["short"] = short df_all_global_info = pd.concat([df_global_info, df_all_global_info]) # collects all the languages a person speaks df_languages = pd.DataFrame(data["de"]["languages"], columns=["language"]) df_languages["short"] = short df_all_languages = pd.concat([df_languages, df_all_languages]) # collects the main skills and the according scores df_main_skills = pd.json_normalize(data["de"]["mainSkills"]) try: main_skill1 = df_main_skills[["mainSkill1Title", "mainSkill1Quality"]] main_skill1 = main_skill1.rename( {"mainSkill1Title": "skillName", "mainSkill1Quality": "skillLevel"}, axis=1, ) except: main_skill1 = pd.DataFrame() try: main_skill2 = df_main_skills[["mainSkill2Title", "mainSkill2Quality"]] main_skill2 = main_skill2.rename( {"mainSkill2Title": "skillName", "mainSkill2Quality": "skillLevel"}, axis=1, ) except: main_skill2 = pd.DataFrame() try: main_skill3 = df_main_skills[["mainSkill3Title", "mainSkill3Quality"]] main_skill3 = main_skill3.rename( {"mainSkill3Title": "skillName", "mainSkill3Quality": "skillLevel"}, axis=1, ) except: main_skill3 = pd.DataFrame() df_main_skills = pd.concat([main_skill1, main_skill2, main_skill3]) df_main_skills["short"] = short df_all_main_skills = pd.concat([df_main_skills, df_all_main_skills]) # collects all skills by skill type df_skill_all = pd.json_normalize(data.get("de", []).get("skills", [])) df_skills = ( pd.DataFrame(df_skill_all.items(), columns=["skillType", "skill"]) .explode("skill") .explode("skill") ) df_skills["short"] = short df_all_skills = pd.concat([df_skills, df_all_skills]) # collects all projects df_projects = pd.json_normalize(data.get("de", []).get("projects", [])) if hasattr(df_projects, "favorite"): df_projects = df_projects.drop(["favorite"], axis=1) if hasattr(df_projects, "tasks"): df_projects = df_projects.drop(["tasks"], axis=1) df_projects["short"] = short df_all_projects = pd.concat([df_projects, df_all_projects]) # collects all tasks per project and person df_project_tasks = pd.json_normalize( data.get("de", []).get("projects", []), record_path=["tasks"], meta=["title"], ) df_project_tasks.rename({0: "task"}, axis=1, inplace=True) df_project_tasks["short"] = short df_all_project_tasks = pd.concat([df_project_tasks, df_all_project_tasks]) # collects all tasks per main emphasis df_emphasis_all = pd.json_normalize(data.get("de", []).get("emphasis", [])) df_emphasis = ( pd.DataFrame(df_emphasis_all.items(), columns=["emphasis", "skill"]) .explode("skill") .explode("skill") ) df_emphasis["short"] = short df_all_emphasis = pd.concat([df_emphasis, df_all_emphasis]) # collects all publications from a person # if there are no publications, an empty list is created df_publications = pd.json_normalize(data.get("de", []).get("publications", [])) df_publications["short"] = short df_all_publications = pd.concat([df_publications, df_all_publications]) # collects all certificates a person has df_certificates = pd.json_normalize(data.get("de", []).get("certificates", [])) df_certificates["short"] = short df_all_certificates = pd.concat([df_certificates, df_all_certificates]) df_all_global_info.to_csv("source\Tables\global_info.csv") df_all_languages.to_csv("source\Tables\languages.csv") df_all_main_skills.to_csv("source\Tables\main_skills.csv") df_all_skills.to_csv("source\Tables\skills.csv") df_all_projects.to_csv("source\Tables\projects.csv") df_all_project_tasks.to_csv("source\Tables\project_tasks.csv") df_all_emphasis.to_csv("source\Tables\emphasis.csv") df_all_publications.to_csv("source\Tables\publications.csv") df_all_certificates.to_csv("source\Tables\certificates.csv")