Commit 03696c7f authored by aa's avatar aa Committed by Julian Johannes Steinsberger-Dührßen

changed checksum for training/testing data from md5 hash to simple 'last-modified' value

parent a20fb0bf
......@@ -63,6 +63,8 @@ import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.io.File;
public class EMADLGenerator {
private GeneratorEMAMOpt2CPP emamGen;
......@@ -200,6 +202,16 @@ public class EMADLGenerator {
}
}
// Only used for training data, since they can be large
public String getChecksumForLargeFile(String filePath) throws IOException {
try {
return (new File(filePath)).lastModified();
} catch (Exception e) {
e.printStackTrace();
return "Exception_calculating_hash_large_file";
}
}
public void generateFiles(TaggingResolver taggingResolver, EMAComponentInstanceSymbol EMAComponentSymbol, Scope symtab, String pythonPath, String forced) throws IOException {
Set<EMAComponentInstanceSymbol> allInstances = new HashSet<>();
List<FileContent> fileContents = generateStrings(taggingResolver, EMAComponentSymbol, symtab, allInstances, forced);
......@@ -239,16 +251,17 @@ public class EMADLGenerator {
String b = backend.getBackendString(backend);
String trainingDataHash = "";
String testDataHash = "";
if(b.equals("CAFFE2")){
trainingDataHash = getChecksumForFile(architecture.get().getDataPath() + "/train_lmdb/data.mdb");
testDataHash = getChecksumForFile(architecture.get().getDataPath() + "/test_lmdb/data.mdb");
}else{
if (architecture.get().getDataPath() != null) {
if (b.equals("CAFFE2")) { //TODO: TensorFlow (really? we can use .h5 files for tensorflow, a seperate library is used anyway, see the example code)
trainingDataHash = getChecksumForLargerFile(architecture.get().getDataPath() + "/train_lmdb/data.mdb");
testDataHash = getChecksumForLargerFile(architecture.get().getDataPath() + "/test_lmdb/data.mdb");
}else{
//What should we do here? For big files infeasible. Maybe hash of metainformation (last changed etc.)?
//trainingDataHash = getChecksumForFile(architecture.get().getDataPath() + "/train.h5");
//testDataHash = getChecksumForFile(architecture.get().getDataPath() + "/test.h5");
}
trainingDataHash = getChecksumForLargeFile(architecture.get().getDataPath() + "/train.h5");
testDataHash = getChecksumForLargerFile(architecture.get().getDataPath() + "/test.h5");
}
}
String trainingHash = emadlHash + "#" + cnntHash; //+ "#" + trainingDataHash + "#" + testDataHash;
boolean alreadyTrained = newHashes.contains(trainingHash) || isAlreadyTrained(trainingHash, componentInstance);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment