Commit 1bcf007d authored by dinhan93's avatar dinhan93

Merge remote-tracking branch 'origin/ML_clustering' into ML_clustering

parents 3b27ae1f 28d3311b
Pipeline #90592 failed with stages
in 59 seconds
package de.monticore.lang.monticar.generator.middleware.clustering;
import de.monticore.lang.monticar.generator.middleware.clustering.algorithms.MarkovClusteringAlgorithm;
import de.monticore.lang.monticar.generator.middleware.clustering.algorithms.SpectralClusteringAlgorithm;
import de.se_rwth.commons.logging.Log;
......@@ -12,6 +13,7 @@ public class ClusteringAlgorithmFactory {
public static ClusteringAlgorithm getFromKind(ClusteringKind kind){
switch (kind){
case SPECTRAL_CLUSTERER: return new SpectralClusteringAlgorithm();
case MARKOV_CLUSTERER: return new MarkovClusteringAlgorithm();
default: Log.error("0x1D54C: No clustering algorithm found for ClusteringKind " + kind);
}
return null;
......
......@@ -2,4 +2,5 @@ package de.monticore.lang.monticar.generator.middleware.clustering;
public enum ClusteringKind {
SPECTRAL_CLUSTERER,
MARKOV_CLUSTERER
}
package de.monticore.lang.monticar.generator.middleware.clustering.algorithms;
import de.monticore.lang.embeddedmontiarc.embeddedmontiarc._symboltable.ExpandedComponentInstanceSymbol;
import de.monticore.lang.monticar.generator.middleware.clustering.AutomaticClusteringHelper;
import de.monticore.lang.monticar.generator.middleware.clustering.ClusteringAlgorithm;
import de.monticore.lang.monticar.generator.middleware.helpers.ComponentHelper;
import de.se_rwth.commons.logging.Log;
import net.sf.javaml.clustering.mcl.SparseMatrix;
import net.sf.javaml.core.Dataset;
import net.sf.javaml.core.DefaultDataset;
import net.sf.javaml.core.DenseInstance;
import net.sf.javaml.core.Instance;
import java.util.*;
// markov mcl clusterer product implementation
public class MarkovClusteringAlgorithm implements ClusteringAlgorithm {
private static Dataset[] getClustering(Dataset data, SparseMatrix matrix) {
int[] sparseMatrixSize = matrix.getSize();
int attractors = 0;
// just for testing/debugging purposes
/*
for(int i = 0; i < sparseMatrixSize[0]; ++i) {
double val = matrix.get(i, i);
if (val != 0.0D) {
++attractors;
}
}
*/
Vector<Vector<Instance>> finalClusters = new Vector();
for(int i = 0; i < sparseMatrixSize[0]; ++i) {
Vector<Instance> cluster = new Vector();
double val = matrix.get(i, i);
if (val >= 0.98D) {
for(int j = 0; j < sparseMatrixSize[0]; ++j) {
double value = matrix.get(j, i);
if (value != 0.0D) {
cluster.add(data.instance(j));
}
}
finalClusters.add(cluster);
}
}
Dataset[] output = new Dataset[finalClusters.size()];
int i;
for(i = 0; i < finalClusters.size(); ++i) {
output[i] = new DefaultDataset();
}
for(i = 0; i < finalClusters.size(); ++i) {
new Vector();
Vector<Instance> getCluster = (Vector)finalClusters.get(i);
for(int j = 0; j < getCluster.size(); ++j) {
output[i].add((Instance)getCluster.get(j));
}
}
return output;
}
@Override
public List<Set<ExpandedComponentInstanceSymbol>> cluster(ExpandedComponentInstanceSymbol component, Object... args) {
List<Set<ExpandedComponentInstanceSymbol>> res = new ArrayList<>();
// params
Double maxResidual= null;
Double gammaExp= null;
Double loopGain= null;
Double zeroMax= null;
// find mandatory params
Map<MarkovClusteringBuilder.MarkovParameters, Boolean> mandatoryParams = new HashMap<MarkovClusteringBuilder.MarkovParameters, Boolean>();
MarkovClusteringBuilder.MarkovParameters[] markovParams = MarkovClusteringBuilder.MarkovParameters.values();
for (MarkovClusteringBuilder.MarkovParameters param : markovParams) {
// set all mandatory params to "unset"
if (param.isMandatory()) mandatoryParams.put(param, false);
}
// Handle (optional) params for MarkovClustering.
// Params come as one or multiple key-value-pairs in the optional varargs array for this method,
// with key as a string (containing the name of the parameter to pass thru to the mcl clusterer) followed by its value as an object
MarkovClusteringBuilder.MarkovParameters key;
Object value;
int v = 0;
while (v < args.length) {
if (args[v] instanceof MarkovClusteringBuilder.MarkovParameters) {
key = (MarkovClusteringBuilder.MarkovParameters)args[v];
if (v+1 < args.length) {
value = args[v + 1];
switch (key) {
case MARKOV_MAX_RESIDUAL:
if (value instanceof Double) {
maxResidual= (Double) value;
}
break;
case MARKOV_GAMMA_EXP:
if (value instanceof Double) {
gammaExp= (Double) value;
}
break;
case MARKOV_LOOP_GAIN:
if (value instanceof Double) {
loopGain= (Double) value;
}
break;
case MARKOV_ZERO_MAX:
if (value instanceof Double) {
zeroMax= (Double) value;
}
break;
}
// set mandatory param to "set"
if (key.isMandatory()) mandatoryParams.replace(key, true);
}
}
v = v + 2;
}
// are all mandatory params set?
boolean error= false;
Iterator iterator = mandatoryParams.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry param = (Map.Entry) iterator.next();
if (!(Boolean)param.getValue()) error= true;
}
if (error) {
Log.error("MarkovClusteringAlgorithm: Mandatory parameter(s) missing!");
} else {
List<ExpandedComponentInstanceSymbol> subcompsOrderedByName = ComponentHelper.getSubcompsOrderedByName(component);
Map<String, Integer> labelsForSubcomps = ComponentHelper.getLabelsForSubcomps(subcompsOrderedByName);
double[][] adjMatrix = AutomaticClusteringHelper.createAdjacencyMatrix(subcompsOrderedByName,
ComponentHelper.getInnerConnectors(component),
labelsForSubcomps);
// |nodes| instances of data with one attribute denoting the node order
Dataset original_ds= new DefaultDataset();
for (int i=0; i<adjMatrix[0].length; i++) {
original_ds.add(new DenseInstance(new double[]{i}));
}
MarkovClusteringBuilder builder = new MarkovClusteringBuilder(AutomaticClusteringHelper.adjacencyMatrix2transitionMatrix(adjMatrix));
if (maxResidual != null) builder.setMaxResidual(maxResidual);
if (gammaExp != null) builder.setGammaExp(gammaExp);
if (loopGain != null) builder.setLoopGain(loopGain);
if (zeroMax != null) builder.setZeroMax(zeroMax);
SparseMatrix matrix = builder.build();
Dataset[] clustered_ds= getClustering(original_ds, matrix);
// interpret clustering for Monti
int data_point;
int[] labels = new int[original_ds.size()];
for (int cluster=0; cluster < clustered_ds.length; cluster++) {
for (int instance=0; instance < clustered_ds[cluster].size(); instance++) {
data_point= clustered_ds[cluster].instance(instance).get(0).intValue();
labels[data_point]= cluster;
}
}
for (int i = 0; i < clustered_ds.length; i++) {
res.add(new HashSet<>());
}
subcompsOrderedByName.forEach(sc -> {
int curClusterLabel = labels[labelsForSubcomps.get(sc.getFullName())];
res.get(curClusterLabel).add(sc);
});
}
return res;
}
}
package de.monticore.lang.monticar.generator.middleware.clustering.algorithms;
import de.monticore.lang.monticar.generator.middleware.clustering.AutomaticClusteringHelper;
import net.sf.javaml.clustering.mcl.MarkovClustering;
import net.sf.javaml.clustering.mcl.SparseMatrix;
import smile.clustering.SpectralClustering;
public class MarkovClusteringBuilder {
private double[][] data; // expected: transition matrix
private double maxResidual = 0.001;
private double gammaExp = 2.0;
private double loopGain = 0.;
private double zeroMax = 0.001;
// parameter list, true if mandatory
public enum MarkovParameters {
MARKOV_MAX_RESIDUAL(false),
MARKOV_GAMMA_EXP(false),
MARKOV_LOOP_GAIN(false),
MARKOV_ZERO_MAX(false);
private Boolean mandatory;
MarkovParameters(Boolean mandatory) {
this.mandatory = mandatory;
}
public Boolean isMandatory() {
return this.mandatory;
}
}
public MarkovClusteringBuilder(double[][] data) {
this.data = data;
}
public MarkovClusteringBuilder setData(double[][] data) {
this.data = data;
return this;
}
public MarkovClusteringBuilder setMaxResidual(double maxResidual) {
this.maxResidual = maxResidual;
return this;
}
public MarkovClusteringBuilder setGammaExp(double gammaExp) {
this.gammaExp = gammaExp;
return this;
}
public MarkovClusteringBuilder setLoopGain(double loopGain) {
this.loopGain = loopGain;
return this;
}
public MarkovClusteringBuilder setZeroMax(double zeroMax) {
this.zeroMax = zeroMax;
return this;
}
public SparseMatrix build() {
SparseMatrix matrix = null;
MarkovClustering mc = new MarkovClustering();
SparseMatrix smatrix = new SparseMatrix(data);
matrix = mc.run(smatrix, this.maxResidual, this.gammaExp, this.loopGain, this.zeroMax);
return matrix;
}
}
......@@ -4,7 +4,7 @@ import smile.clustering.SpectralClustering;
public class SpectralClusteringBuilder {
private double[][] data;
private double[][] data; // expected: adjacency matrix
private Integer k;
private Integer l;
private Double sigma;
......
......@@ -6,6 +6,8 @@ import de.monticore.lang.monticar.generator.middleware.clustering.AutomaticClust
import de.monticore.lang.monticar.generator.middleware.clustering.ClusteringAlgorithm;
import de.monticore.lang.monticar.generator.middleware.clustering.ClusteringAlgorithmFactory;
import de.monticore.lang.monticar.generator.middleware.clustering.ClusteringKind;
import de.monticore.lang.monticar.generator.middleware.clustering.algorithms.MarkovClusteringAlgorithm;
import de.monticore.lang.monticar.generator.middleware.clustering.algorithms.SpectralClusteringAlgorithm;
import de.monticore.lang.monticar.generator.middleware.clustering.algorithms.SpectralClusteringBuilder;
import de.monticore.lang.monticar.generator.middleware.helpers.ComponentHelper;
import de.monticore.lang.monticar.generator.middleware.impls.CPPGenImpl;
......@@ -257,12 +259,19 @@ public class AutomaticClusteringTest extends AbstractSymtabTest{
@Test
public void testClusteringAlgorithms(){
Object[] params;
for(ClusteringKind kind : ClusteringKind.values()){
testCreateClusters(ClusteringAlgorithmFactory.getFromKind(kind));
params= null;
switch (kind) {
case SPECTRAL_CLUSTERER:
params= new Object[] { SpectralClusteringBuilder.SpectralParameters.SPECTRAL_NUM_CLUSTERS, 2 };
break;
}
testCreateClusters(ClusteringAlgorithmFactory.getFromKind(kind), params);
}
}
private void testCreateClusters(ClusteringAlgorithm algorithm){
private void testCreateClusters(ClusteringAlgorithm algorithm, Object[] params){
//UnambiguousCluster
TaggingResolver taggingResolver = AbstractSymtabTest.createSymTabAndTaggingResolver(TEST_PATH);
......@@ -271,36 +280,67 @@ public class AutomaticClusteringTest extends AbstractSymtabTest{
System.out.println(algorithm);
List<Set<ExpandedComponentInstanceSymbol>> clusters = algorithm.cluster(componentInstanceSymbol, SpectralClusteringBuilder.SpectralParameters.SPECTRAL_NUM_CLUSTERS, 2);
List<Set<ExpandedComponentInstanceSymbol>> clusters = null;
if (params != null) clusters = algorithm.cluster(componentInstanceSymbol, params); else
clusters = algorithm.cluster(componentInstanceSymbol);
assertTrue(clusters.size() == 2);
Set<ExpandedComponentInstanceSymbol> cluster1 = clusters.get(0);
Set<ExpandedComponentInstanceSymbol> cluster2 = clusters.get(1);
assertTrue(cluster1.size() == 2);
assertTrue(cluster2.size() == 2);
if (algorithm instanceof SpectralClusteringAlgorithm) {
List<String> cluster1Names = cluster1.stream()
.map(CommonSymbol::getFullName)
.collect(Collectors.toList());
assertTrue(clusters.size() == 2);
List<String> cluster2Names = cluster2.stream()
.map(CommonSymbol::getFullName)
.collect(Collectors.toList());
Set<ExpandedComponentInstanceSymbol> cluster1 = clusters.get(0);
Set<ExpandedComponentInstanceSymbol> cluster2 = clusters.get(1);
assertTrue(cluster1.size() == 2);
assertTrue(cluster2.size() == 2);
if(cluster1Names.get(0).endsWith("compA") || cluster1Names.get(0).endsWith("compB")){
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compA"));
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compB"));
List<String> cluster1Names = cluster1.stream()
.map(CommonSymbol::getFullName)
.collect(Collectors.toList());
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compC"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compD"));
}else{
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compC"));
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compD"));
List<String> cluster2Names = cluster2.stream()
.map(CommonSymbol::getFullName)
.collect(Collectors.toList());
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compA"));
if (cluster1Names.get(0).endsWith("compA") || cluster1Names.get(0).endsWith("compB")) {
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compA"));
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compB"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compC"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compD"));
} else {
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compC"));
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compD"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compA"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compB"));
}
}
if (algorithm instanceof MarkovClusteringAlgorithm) {
assertTrue(clusters.size() == 4);
Set<ExpandedComponentInstanceSymbol> cluster1 = clusters.get(0);
Set<ExpandedComponentInstanceSymbol> cluster2 = clusters.get(1);
Set<ExpandedComponentInstanceSymbol> cluster3 = clusters.get(2);
Set<ExpandedComponentInstanceSymbol> cluster4 = clusters.get(3);
assertTrue(cluster1.size() == 1);
assertTrue(cluster2.size() == 1);
assertTrue(cluster3.size() == 1);
assertTrue(cluster4.size() == 1);
List<String> cluster1Names = cluster1.stream().map(CommonSymbol::getFullName).collect(Collectors.toList());
List<String> cluster2Names = cluster2.stream().map(CommonSymbol::getFullName).collect(Collectors.toList());
List<String> cluster3Names = cluster3.stream().map(CommonSymbol::getFullName).collect(Collectors.toList());
List<String> cluster4Names = cluster4.stream().map(CommonSymbol::getFullName).collect(Collectors.toList());
assertTrue(cluster1Names.contains("clustering.unambiguousCluster.compA"));
assertTrue(cluster2Names.contains("clustering.unambiguousCluster.compB"));
assertTrue(cluster3Names.contains("clustering.unambiguousCluster.compC"));
assertTrue(cluster4Names.contains("clustering.unambiguousCluster.compD"));
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment