"""Create json metadata files from an excel file.""" import os import sys from collections import namedtuple from typing import Dict, List, NamedTuple import pandas as pd # pandas is using another dependency called openpyxl # both need to be installed COLUMN_FOR_INDEX_IN_EXCEL = 0 KEYWORD_TO_IGNORE_COLUMN = "ignore" KEYWORD_TO_IGNORE_SHEET = "ignore" HELP_TXT = """ 1. The source excel file. (e.g. test.xlsx) 2. The destination folder for the json file. (e.g. test_folder) 3. Override the folder if existing. (OPTIONAL, False) Providing only '-- help' will show this text and exit. """ def read_terminal_arguments() -> NamedTuple: """Read terminal arguments. 1. File to read from. 2. Name of the folder to store the json files. 3. Override the destination folder if existing, defaults False. """ terminal_args = namedtuple( "terminal_args", ["source", "destination", "override"], defaults=["test.xlsx", "test", True] ) if len(sys.argv) == 1: # No arguments passed. return terminal_args() # noqa if sys.argv[1] in ["help", "--help", "-h", "?", "h"]: print(HELP_TXT) raise SystemExit try: arg_1 = sys.argv[1] except IndexError: raise SystemExit try: arg_2 = sys.argv[2] except IndexError: raise SystemExit try: arg_3 = sys.argv[3] except IndexError: arg_3 = False terminal_args = terminal_args( source=arg_1, destination=arg_2, override=bool(arg_3) ) return terminal_args def read_excel_file(path: str) -> Dict[str, pd.DataFrame]: """Read the sheets of the Excel file into multiple dataframes.""" if not os.path.exists(path): raise FileNotFoundError( f"No file found at '{path}'" ) sheets_as_dataframes = {} with pd.ExcelFile(path) as xls: for sheet_name in xls.sheet_names: if sheet_name.startswith(KEYWORD_TO_IGNORE_SHEET): continue excel_file = pd.read_excel( xls, sheet_name, index_col=COLUMN_FOR_INDEX_IN_EXCEL ) sheets_as_dataframes.update({sheet_name: excel_file}) return sheets_as_dataframes def make_json(dataframes: Dict[str, pd.DataFrame]) -> Dict[str, str]: """Create the json strings from the excel data.""" json_files = {} for sheet_name, dataframe in dataframes.items(): dataframe.dropna(inplace=True, axis=1) dropped_columns = [column for column in dataframe.columns if column.startswith(KEYWORD_TO_IGNORE_COLUMN)] dataframe.drop(dropped_columns, axis=1, inplace=True, errors="ignore") print(sheet_name) json_string = dataframe.to_json(orient="index", indent=4, force_ascii=False) json_files.update({sheet_name: json_string}) return json_files def save_json_files(folder_name: str, jsons_files: Dict[str, str], override_files: bool) -> None: """Save the json strings to a '.json' file format.""" path_to_json_folder = os.path.join( os.path.abspath(__file__), os.pardir, folder_name ) path_to_json_folder = os.path.abspath(path_to_json_folder) path_exists = os.path.exists(path_to_json_folder) if path_exists and not override_files: raise FileExistsError( f"Folder: {path_to_json_folder} exists. Pass override argument " f"to override the folder. See --help for more." ) if not path_exists: os.mkdir(path_to_json_folder) for json_name, json_sting in jsons_files.items(): file_name = f"{json_name}.json" file_path = os.path.join(path_to_json_folder, file_name) with open(file_path, "w") as f: f.write(json_sting) def main(): """The main function.""" # Get terminal arguments. terminal_arguments = read_terminal_arguments() # Read the Excel file. path_to_file = os.path.abspath(os.path.join( os.path.abspath(__file__), os.pardir, terminal_arguments.source) ) excel_sheets_as_dataframes = read_excel_file(path_to_file) excel_sheets_as_json = make_json(excel_sheets_as_dataframes) save_json_files(terminal_arguments.destination, excel_sheets_as_json, terminal_arguments.override) if __name__ == "__main__": t = main()