Skip to content
Snippets Groups Projects
Commit dc9439a3 authored by maariuzz's avatar maariuzz
Browse files

add acript to create json files

parent 4a5fba1c
No related branches found
No related tags found
1 merge request!3Merge to main to create "WS2223" Version
"""Create json metadata files from an excel file."""
import os
import sys
from collections import namedtuple
from typing import Dict, List, NamedTuple
import pandas as pd
COLUMN_FOR_INDEX_IN_EXCEL = 0
KEYWORD_TO_IGNORE_COLUMN = "ignore"
KEYWORD_TO_IGNORE_SHEET = "ignore"
HELP_TXT = """
1. The source excel file. (e.g. test.xlsx)
2. The destination folder for the json file. (e.g. test_folder)
3. Override the folder if existing. (OPTIONAL, False)
Providing only '-- help' will show this text and exit.
"""
def read_terminal_arguments() -> NamedTuple:
"""Read terminal arguments.
1. File to read from.
2. Name of the folder to store the json files.
3. Override the destination folder if existing, defaults False.
"""
terminal_args = namedtuple(
"terminal_args",
["source", "destination", "override"],
defaults=["test.xlsx", "test", True]
)
if len(sys.argv) == 1: # No arguments passed.
return terminal_args() # noqa
if sys.argv[1] in ["help", "--help", "-h", "?", "h"]:
print(HELP_TXT)
raise SystemExit
try:
arg_1 = sys.argv[1]
except IndexError:
raise SystemExit
try:
arg_2 = sys.argv[2]
except IndexError:
raise SystemExit
try:
arg_3 = sys.argv[3]
except IndexError:
arg_3 = False
terminal_args = terminal_args(
source=arg_1,
destination=arg_2,
override=bool(arg_3)
)
return terminal_args
def read_excel_file(path: str) -> Dict[str, pd.DataFrame]:
"""Read the sheets of the Excel file into multiple dataframes."""
if not os.path.exists(path):
raise FileNotFoundError(
f"No file found at '{path}'"
)
sheets_as_dataframes = {}
with pd.ExcelFile(path) as xls:
for sheet_name in xls.sheet_names:
if sheet_name.startswith(KEYWORD_TO_IGNORE_SHEET):
continue
excel_file = pd.read_excel(
xls,
sheet_name,
index_col=COLUMN_FOR_INDEX_IN_EXCEL
)
sheets_as_dataframes.update({sheet_name: excel_file})
return sheets_as_dataframes
def make_json(dataframes: Dict[str, pd.DataFrame]) -> Dict[str, str]:
"""Create the json strings from the excel data."""
json_files = {}
for sheet_name, dataframe in dataframes.items():
dataframe.dropna(inplace=True, axis=1)
dropped_columns = [column for column in dataframe.columns
if column.startswith(KEYWORD_TO_IGNORE_COLUMN)]
dataframe.drop(dropped_columns, axis=1, inplace=True,
errors="ignore")
print(sheet_name)
json_string = dataframe.to_json(orient="index", indent=4)
json_files.update({sheet_name: json_string})
return json_files
def save_json_files(folder_name: str, jsons_files: Dict[str, str],
override_files: bool) -> None:
"""Save the json strings to a '.json' file format."""
path_to_json_folder = os.path.join(
os.path.abspath(__file__),
os.pardir,
folder_name
)
path_to_json_folder = os.path.abspath(path_to_json_folder)
path_exists = os.path.exists(path_to_json_folder)
if path_exists and not override_files:
raise FileExistsError(
f"Folder: {path_to_json_folder} exists. Pass override argument "
f"to override the folder. See --help for more."
)
if not path_exists:
os.mkdir(path_to_json_folder)
for json_name, json_sting in jsons_files.items():
file_name = f"{json_name}.json"
file_path = os.path.join(path_to_json_folder, file_name)
with open(file_path, "w") as f:
f.write(json_sting)
def main():
"""The main function."""
# Get terminal arguments.
terminal_arguments = read_terminal_arguments()
# Read the Excel file.
path_to_file = os.path.abspath(os.path.join(
os.path.abspath(__file__),
os.pardir,
terminal_arguments.source)
)
excel_sheets_as_dataframes = read_excel_file(path_to_file)
excel_sheets_as_json = make_json(excel_sheets_as_dataframes)
save_json_files(terminal_arguments.destination, excel_sheets_as_json,
terminal_arguments.override)
if __name__ == "__main__":
t = main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment