Newer
Older
"""Create json metadata files from an excel file."""
import os
import sys
from collections import namedtuple
from typing import Dict, List, NamedTuple
import pandas as pd
# pandas is using another dependency called openpyxl
# both need to be installed
COLUMN_FOR_INDEX_IN_EXCEL = 0
KEYWORD_TO_IGNORE_COLUMN = "ignore"
KEYWORD_TO_IGNORE_SHEET = "ignore"
HELP_TXT = """
1. The source excel file. (e.g. test.xlsx)
2. The destination folder for the json file. (e.g. test_folder)
3. Override the folder if existing. (OPTIONAL, False)
Providing only '-- help' will show this text and exit.
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
def read_terminal_arguments() -> NamedTuple:
"""Read terminal arguments.
1. File to read from.
2. Name of the folder to store the json files.
3. Override the destination folder if existing, defaults False.
"""
terminal_args = namedtuple(
"terminal_args",
["source", "destination", "override"],
defaults=["test.xlsx", "test", True]
)
if len(sys.argv) == 1: # No arguments passed.
return terminal_args() # noqa
if sys.argv[1] in ["help", "--help", "-h", "?", "h"]:
print(HELP_TXT)
raise SystemExit
try:
arg_1 = sys.argv[1]
except IndexError:
raise SystemExit
try:
arg_2 = sys.argv[2]
except IndexError:
raise SystemExit
try:
arg_3 = sys.argv[3]
except IndexError:
arg_3 = False
terminal_args = terminal_args(
source=arg_1,
destination=arg_2,
override=bool(arg_3)
)
return terminal_args
def read_excel_file(path: str) -> Dict[str, pd.DataFrame]:
"""Read the sheets of the Excel file into multiple dataframes."""
if not os.path.exists(path):
raise FileNotFoundError(
f"No file found at '{path}'"
)
sheets_as_dataframes = {}
with pd.ExcelFile(path) as xls:
for sheet_name in xls.sheet_names:
if sheet_name.startswith(KEYWORD_TO_IGNORE_SHEET):
continue
excel_file = pd.read_excel(
xls,
sheet_name,
index_col=COLUMN_FOR_INDEX_IN_EXCEL
)
sheets_as_dataframes.update({sheet_name: excel_file})
return sheets_as_dataframes
def make_json(dataframes: Dict[str, pd.DataFrame]) -> Dict[str, str]:
"""Create the json strings from the excel data."""
json_files = {}
for sheet_name, dataframe in dataframes.items():
dataframe.dropna(inplace=True, axis=1)
dropped_columns = [column for column in dataframe.columns
if column.startswith(KEYWORD_TO_IGNORE_COLUMN)]
dataframe.drop(dropped_columns, axis=1, inplace=True,
errors="ignore")
print(sheet_name)
json_string = dataframe.to_json(orient="index", indent=4, force_ascii=False)
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
json_files.update({sheet_name: json_string})
return json_files
def save_json_files(folder_name: str, jsons_files: Dict[str, str],
override_files: bool) -> None:
"""Save the json strings to a '.json' file format."""
path_to_json_folder = os.path.join(
os.path.abspath(__file__),
os.pardir,
folder_name
)
path_to_json_folder = os.path.abspath(path_to_json_folder)
path_exists = os.path.exists(path_to_json_folder)
if path_exists and not override_files:
raise FileExistsError(
f"Folder: {path_to_json_folder} exists. Pass override argument "
f"to override the folder. See --help for more."
)
if not path_exists:
os.mkdir(path_to_json_folder)
for json_name, json_sting in jsons_files.items():
file_name = f"{json_name}.json"
file_path = os.path.join(path_to_json_folder, file_name)
with open(file_path, "w") as f:
f.write(json_sting)
def main():
"""The main function."""
# Get terminal arguments.
terminal_arguments = read_terminal_arguments()
# Read the Excel file.
path_to_file = os.path.abspath(os.path.join(
os.path.abspath(__file__),
os.pardir,
terminal_arguments.source)
)
excel_sheets_as_dataframes = read_excel_file(path_to_file)
excel_sheets_as_json = make_json(excel_sheets_as_dataframes)
save_json_files(terminal_arguments.destination, excel_sheets_as_json,
terminal_arguments.override)
if __name__ == "__main__":
t = main()