Commit 2f351671 authored by Lennart Holzenkamp's avatar Lennart Holzenkamp
Browse files

changes parameter cluster_count to max_cluster_count

parent efbdaf86
......@@ -9,6 +9,7 @@ import numpy as np
import functions as f
import cluster
from sklearn import cluster as skcluster
from sklearn_extra import cluster as cluster_extra
#
# PARAMETERS
......@@ -16,10 +17,9 @@ p_ocel_file = './data/ocel.jsonocel'
p_mode = 'existence' # all | existence
p_object_type = 'items' # object-type-name, options depend on the data
p_attr_weights = {
'producer': 2
} # attributes that are not given in the data are not used
p_clustering_mode = 'kmeans' # optional, default: kmeans
p_cluster_count = np.NaN # cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_max_cluster_count = 10 # np.NaN # cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_ocel_file_type = 'json' # json|xml
p_graph_file_type = 'svg' # svg|png
# END PARAMETERS
......@@ -34,7 +34,7 @@ print('p_object_type: "' + str(p_object_type) + '".')
print('p_attr_weights:')
print(p_attr_weights)
print('p_clustering_mode: "' + str(p_clustering_mode) + '".')
print('p_cluster_count: "' + str(p_cluster_count) + '".')
print('p_cluster_count: "' + str(p_max_cluster_count) + '".')
print('p_ocel_file_type: "' + str(p_ocel_file_type) + '".')
print('p_graph_file_type: "' + str(p_graph_file_type) + '".')
print('-------------------------------------------------------')
......@@ -46,6 +46,7 @@ assert p_mode in c.MODES, 'selected mode not possible. Use either ''all'' or ''e
assert exists(p_ocel_file), 'file does not exists'
algorithms = {
'kmeans': skcluster.KMeans(),
'kmediods': cluster_extra.KMedoids(),
'spectral': skcluster.SpectralClustering(),
'agglomerative': skcluster.AgglomerativeClustering()
}
......@@ -78,11 +79,13 @@ index_to_oid_map = res['index']
algo = algorithms[p_clustering_mode]
try:
cluster_count = int(p_cluster_count)
assert cluster_count >= 2, 'cluster_count needs to be at least 2'
assert cluster_count < len(index_to_oid_map), 'cluster_count needs to be less than the count of distinct objects in the ocel-data.'
max_cluster_count = int(p_max_cluster_count)
except:
cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=math.floor(len(index_to_oid_map) / 2)+1)
max_cluster_count = 10
assert max_cluster_count >= 2, 'cluster_count needs to be at least 2'
assert max_cluster_count < len(index_to_oid_map), 'cluster_count needs to be less than the count of distinct objects in the ocel-data.'
cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=max_cluster_count)
algo.set_params(n_clusters=cluster_count)
cluster_res = cluster.cluster_matrix(distance_matrix, algorithm=algo)
......@@ -95,8 +98,8 @@ print('-------------------------------------------------------')
print(' OBJECT-CLUSTERING ')
print('clustering-technique:')
print(p_clustering_mode)
print('given cluster-count:')
print(p_cluster_count)
print('max cluster count given:')
print(max_cluster_count)
print('cluster-count used:')
print(cluster_count)
print('object-cluster-dataframe:')
......@@ -184,10 +187,19 @@ directory = os.path.dirname(p_ocel_file) + '/clustered_ocel_files'
if not os.path.exists(directory):
os.makedirs(directory)
appendix_len = len(str(cluster_count))
print('saving clustered ocel files...')
for ii in range(0, len(res)):
filename = directory + '/cluster_' + str(ii+1).rjust(appendix_len) + '.' + p_ocel_file_type + 'ocel'
pm4py.write_ocel(res[ii], filename)
print(str(ii+1).rjust(appendix_len) + '/' + str(cluster_count) + '"' + filename + '" stored.')
print('creating and storing ocdfgs...')
for ii in range(0, len(res)):
filename = directory + '/cluster_' + str(ii+1).rjust(appendix_len) + '_graph.' + p_graph_file_type
ocdfg = pm4py.discover_ocdfg(res[ii])
pm4py.save_vis_ocdfg(ocdfg, filename)
print(str(ii+1).rjust(appendix_len) + '/' + str(cluster_count) + '"' + filename + '" stored.')
duration = datetime.datetime.now() - start_ts
print('--------------------------')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment