Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Stanislav Yuliyanov
oc-dfg-clustering
Commits
2f351671
Commit
2f351671
authored
May 24, 2022
by
Lennart Holzenkamp
Browse files
changes parameter cluster_count to max_cluster_count
parent
efbdaf86
Changes
1
Hide whitespace changes
Inline
Side-by-side
code/main.py
View file @
2f351671
...
...
@@ -9,6 +9,7 @@ import numpy as np
import
functions
as
f
import
cluster
from
sklearn
import
cluster
as
skcluster
from
sklearn_extra
import
cluster
as
cluster_extra
#
# PARAMETERS
...
...
@@ -16,10 +17,9 @@ p_ocel_file = './data/ocel.jsonocel'
p_mode
=
'existence'
# all | existence
p_object_type
=
'items'
# object-type-name, options depend on the data
p_attr_weights
=
{
'producer'
:
2
}
# attributes that are not given in the data are not used
p_clustering_mode
=
'kmeans'
# optional, default: kmeans
p_cluster_count
=
np
.
NaN
# cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_
max_
cluster_count
=
10
#
np.NaN # cluster-count (optional, default: np.NaN which leads to automatic k determination)
p_ocel_file_type
=
'json'
# json|xml
p_graph_file_type
=
'svg'
# svg|png
# END PARAMETERS
...
...
@@ -34,7 +34,7 @@ print('p_object_type: "' + str(p_object_type) + '".')
print
(
'p_attr_weights:'
)
print
(
p_attr_weights
)
print
(
'p_clustering_mode: "'
+
str
(
p_clustering_mode
)
+
'".'
)
print
(
'p_cluster_count: "'
+
str
(
p_cluster_count
)
+
'".'
)
print
(
'p_cluster_count: "'
+
str
(
p_
max_
cluster_count
)
+
'".'
)
print
(
'p_ocel_file_type: "'
+
str
(
p_ocel_file_type
)
+
'".'
)
print
(
'p_graph_file_type: "'
+
str
(
p_graph_file_type
)
+
'".'
)
print
(
'-------------------------------------------------------'
)
...
...
@@ -46,6 +46,7 @@ assert p_mode in c.MODES, 'selected mode not possible. Use either ''all'' or ''e
assert exists(p_ocel_file), '
file
does
not
exists
'
algorithms = {
'
kmeans
': skcluster.KMeans(),
'
kmediods
': cluster_extra.KMedoids(),
'
spectral
': skcluster.SpectralClustering(),
'
agglomerative
': skcluster.AgglomerativeClustering()
}
...
...
@@ -78,11 +79,13 @@ index_to_oid_map = res['index']
algo = algorithms[p_clustering_mode]
try:
cluster_count = int(p_cluster_count)
assert cluster_count >= 2, '
cluster_count
needs
to
be
at
least
2
'
assert cluster_count < len(index_to_oid_map), '
cluster_count
needs
to
be
less
than
the
count
of
distinct
objects
in
the
ocel
-
data
.
'
max_cluster_count = int(p_max_cluster_count)
except:
cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=math.floor(len(index_to_oid_map) / 2)+1)
max_cluster_count = 10
assert max_cluster_count >= 2, '
cluster_count
needs
to
be
at
least
2
'
assert max_cluster_count < len(index_to_oid_map), '
cluster_count
needs
to
be
less
than
the
count
of
distinct
objects
in
the
ocel
-
data
.
'
cluster_count = cluster.determine_optimal_k(distance_matrix, algorithm=algo, k_max=max_cluster_count)
algo.set_params(n_clusters=cluster_count)
cluster_res = cluster.cluster_matrix(distance_matrix, algorithm=algo)
...
...
@@ -95,8 +98,8 @@ print('-------------------------------------------------------')
print('
OBJECT
-
CLUSTERING
')
print('
clustering
-
technique
:
')
print(p_clustering_mode)
print('
given
cluster
-
count
:
')
print(
p
_cluster_count)
print('
max
cluster
count
given
:
')
print(
max
_cluster_count)
print('
cluster
-
count
used
:
')
print(cluster_count)
print('
object
-
cluster
-
dataframe
:
')
...
...
@@ -184,10 +187,19 @@ directory = os.path.dirname(p_ocel_file) + '/clustered_ocel_files'
if not os.path.exists(directory):
os.makedirs(directory)
appendix_len = len(str(cluster_count))
print('
saving
clustered
ocel
files
...
')
for ii in range(0, len(res)):
filename = directory + '
/
cluster_
' + str(ii+1).rjust(appendix_len) + '
.
' + p_ocel_file_type + '
ocel
'
pm4py.write_ocel(res[ii], filename)
print(str(ii+1).rjust(appendix_len) + '
/
' + str(cluster_count) + '"' + filename + '"
stored
.
')
print('
creating
and
storing
ocdfgs
...
')
for ii in range(0, len(res)):
filename = directory + '
/
cluster_
' + str(ii+1).rjust(appendix_len) + '
_graph
.
' + p_graph_file_type
ocdfg = pm4py.discover_ocdfg(res[ii])
pm4py.save_vis_ocdfg(ocdfg, filename)
print(str(ii+1).rjust(appendix_len) + '
/
' + str(cluster_count) + '"' + filename + '"
stored
.
')
duration = datetime.datetime.now() - start_ts
print('
--------------------------
')
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment