diff --git a/datasheets/create_json_from_excel.py b/datasheets/create_json_from_excel.py new file mode 100644 index 0000000000000000000000000000000000000000..876bac1544be8c76ff79e8fd459703cf4b064751 --- /dev/null +++ b/datasheets/create_json_from_excel.py @@ -0,0 +1,136 @@ +"""Create json metadata files from an excel file.""" +import os +import sys +from collections import namedtuple +from typing import Dict, List, NamedTuple + +import pandas as pd + + +COLUMN_FOR_INDEX_IN_EXCEL = 0 +KEYWORD_TO_IGNORE_COLUMN = "ignore" +KEYWORD_TO_IGNORE_SHEET = "ignore" +HELP_TXT = """ + 1. The source excel file. (e.g. test.xlsx) + 2. The destination folder for the json file. (e.g. test_folder) + 3. Override the folder if existing. (OPTIONAL, False) + + Providing only '-- help' will show this text and exit. +""" + + +def read_terminal_arguments() -> NamedTuple: + """Read terminal arguments. + + 1. File to read from. + 2. Name of the folder to store the json files. + 3. Override the destination folder if existing, defaults False. + """ + terminal_args = namedtuple( + "terminal_args", + ["source", "destination", "override"], + defaults=["test.xlsx", "test", True] + ) + if len(sys.argv) == 1: # No arguments passed. + return terminal_args() # noqa + if sys.argv[1] in ["help", "--help", "-h", "?", "h"]: + print(HELP_TXT) + raise SystemExit + try: + arg_1 = sys.argv[1] + except IndexError: + raise SystemExit + try: + arg_2 = sys.argv[2] + except IndexError: + raise SystemExit + try: + arg_3 = sys.argv[3] + except IndexError: + arg_3 = False + + terminal_args = terminal_args( + source=arg_1, + destination=arg_2, + override=bool(arg_3) + ) + return terminal_args + + +def read_excel_file(path: str) -> Dict[str, pd.DataFrame]: + """Read the sheets of the Excel file into multiple dataframes.""" + if not os.path.exists(path): + raise FileNotFoundError( + f"No file found at '{path}'" + ) + sheets_as_dataframes = {} + with pd.ExcelFile(path) as xls: + for sheet_name in xls.sheet_names: + if sheet_name.startswith(KEYWORD_TO_IGNORE_SHEET): + continue + excel_file = pd.read_excel( + xls, + sheet_name, + index_col=COLUMN_FOR_INDEX_IN_EXCEL + ) + sheets_as_dataframes.update({sheet_name: excel_file}) + return sheets_as_dataframes + + +def make_json(dataframes: Dict[str, pd.DataFrame]) -> Dict[str, str]: + """Create the json strings from the excel data.""" + json_files = {} + for sheet_name, dataframe in dataframes.items(): + dataframe.dropna(inplace=True, axis=1) + dropped_columns = [column for column in dataframe.columns + if column.startswith(KEYWORD_TO_IGNORE_COLUMN)] + dataframe.drop(dropped_columns, axis=1, inplace=True, + errors="ignore") + print(sheet_name) + json_string = dataframe.to_json(orient="index", indent=4) + json_files.update({sheet_name: json_string}) + return json_files + + +def save_json_files(folder_name: str, jsons_files: Dict[str, str], + override_files: bool) -> None: + """Save the json strings to a '.json' file format.""" + path_to_json_folder = os.path.join( + os.path.abspath(__file__), + os.pardir, + folder_name + ) + path_to_json_folder = os.path.abspath(path_to_json_folder) + path_exists = os.path.exists(path_to_json_folder) + if path_exists and not override_files: + raise FileExistsError( + f"Folder: {path_to_json_folder} exists. Pass override argument " + f"to override the folder. See --help for more." + ) + if not path_exists: + os.mkdir(path_to_json_folder) + for json_name, json_sting in jsons_files.items(): + file_name = f"{json_name}.json" + file_path = os.path.join(path_to_json_folder, file_name) + with open(file_path, "w") as f: + f.write(json_sting) + + +def main(): + """The main function.""" + # Get terminal arguments. + terminal_arguments = read_terminal_arguments() + # Read the Excel file. + path_to_file = os.path.abspath(os.path.join( + os.path.abspath(__file__), + os.pardir, + terminal_arguments.source) + ) + excel_sheets_as_dataframes = read_excel_file(path_to_file) + excel_sheets_as_json = make_json(excel_sheets_as_dataframes) + save_json_files(terminal_arguments.destination, excel_sheets_as_json, + terminal_arguments.override) + + +if __name__ == "__main__": + t = main()