Commit d2995568 authored by SimonGlomb's avatar SimonGlomb
Browse files

Merge branch 'main' into flask

parents 519abbb4 45926e5a
......@@ -14,11 +14,11 @@ def hello_world():
@app.route("/run_code", methods=["GET"])
def run_code():
main.main()
return "<p>Back to Main Page</p>"
@app.route("/set_params", methods=["POST"])
def set_params():
global params
ocel_file = request.form.get("ocel_file")
form = model.MyForm()
main.p_ocel_file = form.ocel_file.data
......@@ -27,7 +27,7 @@ def set_params():
main.p_object_type = form.object_type.data
main.p_clusteval_mode = form.clusteval_mode.data
main.p_clustering_mode = form.clustering_mode.data
main.p_cluster_count = form.cluster_count.data
main.p_max_cluster_count = form.max_cluster_count.data
main.p_ocel_file_type = form.ocel_file_type.data
main.p_graph_file_type = form.graph_file_type.data
if form.validate_on_submit():
......
......@@ -9,20 +9,23 @@ import numpy as np
import functions as f
import cluster
from sklearn import cluster as skcluster
from sklearn_extra import cluster as cluster_extra
from clusteval import clusteval
from scipy.spatial import distance
#
# PARAMETERS
p_ocel_file = './data/ocel.jsonocel'
p_mode = 'existence' # all | existence
p_object_type = 'packages' # object-type-name, options depend on the data
p_ocel_file = './data/ocel.jsonocel'
p_mode = 'existence' # all | existence
p_object_type = 'packages' # object-type-name, options depend on the data
p_attr_weights = {
} # attributes that are not given in the data are not used
p_clusteval_mode = 'silhouette'
p_clustering_mode = 'kmeans' # optional, default: kmeans
p_cluster_count = 3 # np.NaN # np.NaN # cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_ocel_file_type = 'json' # json|xml
p_graph_file_type = 'svg' # svg|png
p_clustering_mode = 'kmeans' # optional, default: kmeans
p_max_cluster_count = np.NaN # np.NaN # cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_ocel_file_type = 'json' # json|xml
p_graph_file_type = 'svg' # svg|png
# END PARAMETERS
#
......@@ -33,10 +36,11 @@ def main():
global p_attr_weights
global p_clusteval_mode
global p_clustering_mode
global p_cluster_count
global p_max_cluster_count
global p_ocel_file_type
global p_graph_file_type
print('Program startet...')
print('-------------------------------------------------------')
print(' SETTINGS ')
......@@ -45,8 +49,9 @@ def main():
print('p_object_type: "' + str(p_object_type) + '".')
print('p_attr_weights:')
print(p_attr_weights)
print('p_clusteval_mode: "' + str(p_clusteval_mode) + '".')
print('p_clustering_mode: "' + str(p_clustering_mode) + '".')
print('p_cluster_count: "' + str(p_cluster_count) + '".')
print('p_max_cluster_count: "' + str(p_max_cluster_count) + '".')
print('p_ocel_file_type: "' + str(p_ocel_file_type) + '".')
print('p_graph_file_type: "' + str(p_graph_file_type) + '".')
print('-------------------------------------------------------')
......@@ -58,6 +63,7 @@ def main():
assert exists(p_ocel_file), 'file does not exists'
algorithms = {
'kmeans': skcluster.KMeans(),
'kmediods': cluster_extra.KMedoids(),
'spectral': skcluster.SpectralClustering(),
'agglomerative': skcluster.AgglomerativeClustering()
}
......@@ -90,25 +96,33 @@ def main():
algo = algorithms[p_clustering_mode]
try:
cluster_count = int(p_cluster_count)
assert cluster_count >= 2, 'cluster_count needs to be at least 2'
assert cluster_count < len(index_to_oid_map), 'cluster_count needs to be less than the count of distinct objects in the ocel-data.'
max_cluster_count = int(p_max_cluster_count)
except:
cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=math.floor(len(index_to_oid_map) / 2)+1)
max_cluster_count = 0
# assert max_cluster_count >= 2, 'cluster_count needs to be at least 2'
# max_cluster darf jetzt auch 0 sein, dann wird by default der Algorithmus entscheiden, wieviele cluster nötig zu berechnen sind.
assert max_cluster_count < len(index_to_oid_map), 'cluster_count needs to be less than the count of distinct objects in the ocel-data.'
# cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=max_cluster_count)
if (max_cluster_count == 0):
results = clusteval(evaluate = p_clusteval_mode).fit(distance_matrix)
else:
results = clusteval(evaluate = p_clusteval_mode, max_clust=max_cluster_count).fit(distance_matrix)
cluster_count = results['score']['clusters'].iloc[np.where(results['score']['score'] == results['score']['score'].max())[0][0]]
algo.set_params(n_clusters=cluster_count)
cluster_res = cluster.cluster_matrix(distance_matrix, algorithm=algo)
df_clusters = pd.DataFrame({'cluster': cluster_res[1]}, index=index_to_oid_map) # creating dataframe
df_clusters.index.name='ocel:oid' # setting index name for joining
duration = datetime.datetime.now() - start_ts
print('-------------------------------------------------------')
print(' OBJECT-CLUSTERING ')
print('clustering-technique:')
print(p_clustering_mode)
print('given cluster-count:')
print(p_cluster_count)
# print('max cluster count given:')
# print(max_cluster_count)
print('cluster-count used:')
print(cluster_count)
print('object-cluster-dataframe:')
......@@ -196,9 +210,18 @@ def main():
if not os.path.exists(directory):
os.makedirs(directory)
appendix_len = len(str(cluster_count))
print('saving clustered ocel files...')
for ii in range(0, len(res)):
filename = directory + '/cluster_' + str(ii+1).rjust(appendix_len) + '.' + p_ocel_file_type + 'ocel'
pm4py.write_ocel(ocel, filename)
pm4py.write_ocel(res[ii], filename)
print(str(ii+1).rjust(appendix_len) + '/' + str(cluster_count) + '"' + filename + '" stored.')
print('creating and storing ocdfgs...')
for ii in range(0, len(res)):
filename = directory + '/cluster_' + str(ii+1).rjust(appendix_len) + '_graph.' + p_graph_file_type
ocdfg = pm4py.discover_ocdfg(res[ii])
pm4py.save_vis_ocdfg(ocdfg, filename)
print(str(ii+1).rjust(appendix_len) + '/' + str(cluster_count) + '"' + filename + '" stored.')
duration = datetime.datetime.now() - start_ts
......@@ -206,6 +229,5 @@ def main():
print('duration: ' + str(duration))
print('-------------------------------------------------------')
if __name__ =="__main__":
if __name__=="__main__":
main()
\ No newline at end of file
......@@ -9,6 +9,6 @@ class MyForm(FlaskForm):
attr_weights = StringField('attr_weights', validators=[DataRequired()])
clusteval_mode = StringField('clusteval_mode')
clustering_mode = StringField('clustering_mode')
cluster_count = StringField('cluster_count')
max_cluster_count = StringField('max_cluster_count')
ocel_file_type = StringField('ocel_file_type')
graph_file_type = StringField('graph_file_type')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment