-
Hock, Benedikt authoredHock, Benedikt authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
create_json_from_excel.py 4.38 KiB
"""Create json metadata files from an excel file."""
import os
import sys
from collections import namedtuple
from typing import Dict, List, NamedTuple
import pandas as pd
# pandas is using another dependency called openpyxl
# both need to be installed
COLUMN_FOR_INDEX_IN_EXCEL = 0
KEYWORD_TO_IGNORE_COLUMN = "ignore"
KEYWORD_TO_IGNORE_SHEET = "ignore"
HELP_TXT = """
1. The source excel file. (e.g. test.xlsx)
2. The destination folder for the json file. (e.g. test_folder)
3. Override the folder if existing. (OPTIONAL, False)
Providing only '-- help' will show this text and exit.
"""
def read_terminal_arguments() -> NamedTuple:
"""Read terminal arguments.
1. File to read from.
2. Name of the folder to store the json files.
3. Override the destination folder if existing, defaults False.
"""
terminal_args = namedtuple(
"terminal_args",
["source", "destination", "override"],
defaults=["test.xlsx", "test", True]
)
if len(sys.argv) == 1: # No arguments passed.
return terminal_args() # noqa
if sys.argv[1] in ["help", "--help", "-h", "?", "h"]:
print(HELP_TXT)
raise SystemExit
try:
arg_1 = sys.argv[1]
except IndexError:
raise SystemExit
try:
arg_2 = sys.argv[2]
except IndexError:
raise SystemExit
try:
arg_3 = sys.argv[3]
except IndexError:
arg_3 = False
terminal_args = terminal_args(
source=arg_1,
destination=arg_2,
override=bool(arg_3)
)
return terminal_args
def read_excel_file(path: str) -> Dict[str, pd.DataFrame]:
"""Read the sheets of the Excel file into multiple dataframes."""
if not os.path.exists(path):
raise FileNotFoundError(
f"No file found at '{path}'"
)
sheets_as_dataframes = {}
with pd.ExcelFile(path) as xls:
for sheet_name in xls.sheet_names:
if sheet_name.startswith(KEYWORD_TO_IGNORE_SHEET):
continue
excel_file = pd.read_excel(
xls,
sheet_name,
index_col=COLUMN_FOR_INDEX_IN_EXCEL
)
sheets_as_dataframes.update({sheet_name: excel_file})
return sheets_as_dataframes
def make_json(dataframes: Dict[str, pd.DataFrame]) -> Dict[str, str]:
"""Create the json strings from the excel data."""
json_files = {}
for sheet_name, dataframe in dataframes.items():
dataframe.dropna(inplace=True, axis=1)
dropped_columns = [column for column in dataframe.columns
if column.startswith(KEYWORD_TO_IGNORE_COLUMN)]
dataframe.drop(dropped_columns, axis=1, inplace=True,
errors="ignore")
print(sheet_name)
json_string = dataframe.to_json(orient="index", indent=4, force_ascii=False)
json_files.update({sheet_name: json_string})
return json_files
def save_json_files(folder_name: str, jsons_files: Dict[str, str],
override_files: bool) -> None:
"""Save the json strings to a '.json' file format."""
path_to_json_folder = os.path.join(
os.path.abspath(__file__),
os.pardir,
folder_name
)
path_to_json_folder = os.path.abspath(path_to_json_folder)
path_exists = os.path.exists(path_to_json_folder)
if path_exists and not override_files:
raise FileExistsError(
f"Folder: {path_to_json_folder} exists. Pass override argument "
f"to override the folder. See --help for more."
)
if not path_exists:
os.mkdir(path_to_json_folder)
for json_name, json_sting in jsons_files.items():
file_name = f"{json_name}.json"
file_path = os.path.join(path_to_json_folder, file_name)
with open(file_path, "w") as f:
f.write(json_sting)
def main():
"""The main function."""
# Get terminal arguments.
terminal_arguments = read_terminal_arguments()
# Read the Excel file.
path_to_file = os.path.abspath(os.path.join(
os.path.abspath(__file__),
os.pardir,
terminal_arguments.source)
)
excel_sheets_as_dataframes = read_excel_file(path_to_file)
excel_sheets_as_json = make_json(excel_sheets_as_dataframes)
save_json_files(terminal_arguments.destination, excel_sheets_as_json,
terminal_arguments.override)
if __name__ == "__main__":
t = main()