Skip to content
Snippets Groups Projects
Commit e8f06788 authored by Petar Hristov's avatar Petar Hristov :speech_balloon:
Browse files

Merge branch 'dev' into 'main'

Release: Sprint/2023 04 :robot:

See merge request !5
parents 406d8686 f7c5157f
No related branches found
No related tags found
1 merge request!5Release: Sprint/2023 04 :robot:
Pipeline #924609 passed
......@@ -6,12 +6,12 @@ using Coscine.ResourceTypes.Base;
using Coscine.ResourceTypes.Base.Models;
using Org.OpenAPITools.Api;
using Org.OpenAPITools.Model;
using VDS.RDF.Query;
using VDS.RDF;
using MetadataExtractorCron.Util;
using VDS.RDF.Parsing;
using System.Globalization;
using System.Security.Cryptography;
using Coscine.Metadata.Util;
namespace MetadataExtractorCron.Extractors;
......@@ -25,14 +25,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private readonly RdfStoreConnector _rdfStoreConnector;
private readonly MetadataGraphsCreator _metadataGraphsCreator;
private const string metadataExtractionVersionUrl = "https://purl.org/coscine/terms/metatadataextraction#version";
private const string dcatdistributionUrl = "http://www.w3.org/ns/dcat#distribution";
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
public CoscineMetadataExtractor()
{
_configuration = new ConsulConfiguration();
......@@ -72,7 +64,7 @@ public class CoscineMetadataExtractor : IMetadataExtractor
foreach (var file in fileInfos.Where((fileInfo) => fileInfo.HasBody))
{
if (file.BodyBytes > 16 * 1000 * 1000)
if (file.BodyBytes > VersionUtil.DetectionByteLimit)
{
Console.WriteLine($"Skipping {file.Key} on {resourceId} since it has a too large byte size");
continue;
......@@ -114,63 +106,31 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private void CreateMetadataSetsIfDontExist(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}";
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
var existingGraphs = _rdfStoreConnector.GetMetadataIds(resourceId, entry.Key);
if (!existingGraphs.Any())
{
Console.WriteLine($"Creating graphs for {newFileGraphName} since they did not exist before!");
_metadataGraphsCreator.CreateGraphs(resourceId, entry, fileInfos);
}
}
private IEnumerable<Uri> ListGraphs(string id)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = _rdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
Console.WriteLine($"Creating graphs for {resourceId}, {entry.Key} since they did not exist before!");
GraphStorer.StoreGraphs(_metadataGraphsCreator.CreateGraphs(
resourceId,
entry.Key,
true,
true
), _rdfStoreConnector);
}
return graphs;
}
private bool HasCurrentMetadataExtracted(string resourceId, ResourceEntry entry)
{
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}";
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
var existingGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key);
var existingExtractedGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key, true);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingGraphs);
var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingExtractedGraphs);
return
recentDataExtractedVersion != null
&& recentDataVersion != null
&& recentDataExtractedVersion.AbsoluteUri.Contains(recentDataVersion.AbsoluteUri)
&& recentDataExtractedVersion.AbsoluteUri != recentDataVersion.AbsoluteUri;
&& recentDataExtractedVersion.Contains(recentDataVersion)
&& recentDataExtractedVersion != recentDataVersion;
}
private async Task<MetadataOutput> ExtractMetadata(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions)
......@@ -195,9 +155,9 @@ public class CoscineMetadataExtractor : IMetadataExtractor
var extractedOutputs = await _apiClient.PostMetadataExtractorWorkerAsync(
givenStream,
$"{resourceId}/{entry.Key.Replace("\\", "/")}",
null,
entry.Created?.ToString("o", CultureInfo.InvariantCulture),
entry.Modified?.ToString("o", CultureInfo.InvariantCulture)
null!,
entry.Created?.ToString("o", CultureInfo.InvariantCulture)!,
entry.Modified?.ToString("o", CultureInfo.InvariantCulture)!
);
return extractedOutputs[0];
......@@ -221,9 +181,8 @@ public class CoscineMetadataExtractor : IMetadataExtractor
newFileGraphNameAddon += "/";
}
var existingGraphs = ListGraphs(newFileGraphNameAddon);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
var recentDataVersion = _rdfStoreConnector.GetDataId(resourceId, entry.Key);
var recentMetadataVersion = _rdfStoreConnector.GetMetadataId(resourceId, entry.Key);
await CreateHashData(resourceId, entry, resourceTypeDefinition, resourceTypeOptions, newFileGraphNameAddon, recentDataVersion);
......@@ -232,14 +191,14 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The recent data version is null and can't be used.");
}
var recentDataExtractedVersion = new Uri(recentDataVersion.AbsoluteUri + "&extracted=true");
var recentDataExtractedVersion = new Uri(recentDataVersion + "&extracted=true");
if (recentMetadataVersion is null)
{
throw new NullReferenceException("The recent metadata version is null and can't be used.");
}
var recentMetadataExtractedVersion = new Uri(recentMetadataVersion.AbsoluteUri + "&extracted=true");
var recentMetadataExtractedVersion = new Uri(recentMetadataVersion + "&extracted=true");
var tripleStore = new TripleStore();
tripleStore.LoadFromString(extractedMetadata.Metadata, new TriGParser(TriGSyntax.Recommendation));
......@@ -248,57 +207,22 @@ public class CoscineMetadataExtractor : IMetadataExtractor
GraphStorer.StoreGraphs(tripleStore.Graphs, _rdfStoreConnector);
var trellisGraph = _rdfStoreConnector.GetGraph(trellisGraphUri);
var triples = new List<Triple>();
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentDataExtractedVersion.AbsoluteUri, triples);
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentMetadataExtractedVersion.AbsoluteUri, triples);
GraphStorer.AddToGraph(trellisGraph, triples, _rdfStoreConnector);
var newDataFileGraphName = $"{newFileGraphName}/@type=data";
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata";
var dataGraph = CreateOrGetGraph(newDataFileGraphName);
var metadataGraph = CreateOrGetGraph(newMetadataFileGraphName);
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(new Uri(newDataFileGraphName)),
dataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
dataGraph.CreateUriNode(recentDataExtractedVersion)
));
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(recentDataExtractedVersion),
dataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
dataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(new Uri(newMetadataFileGraphName)),
metadataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
metadataGraph.CreateUriNode(recentMetadataExtractedVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataExtractedVersion),
metadataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
metadataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataVersion),
metadataGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#isMetadataOf")),
metadataGraph.CreateUriNode(recentDataVersion)
));
var provenanceGraphs = new List<IGraph> { dataGraph, metadataGraph };
GraphStorer.StoreGraphs(provenanceGraphs, _rdfStoreConnector);
}
private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, Uri? recentDataVersion)
GraphStorer.StoreGraphs(
_metadataGraphsCreator.UpdateExtractionGraphs(
resourceId,
entry.Key,
recentDataVersion,
recentMetadataVersion,
metadataExtractorVersion
),
_rdfStoreConnector);
}
private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, string? recentDataVersion)
{
var dataGraphName = $"{newFileGraphNameAddon}@type=data";
var dataGraph = CreateOrGetGraph(dataGraphName);
var hashTriples = new List<Triple>();
var loadedEntry = await resourceTypeDefinition.LoadEntry(resourceId, entry.Key, resourceTypeOptions);
if (loadedEntry is null)
......@@ -306,25 +230,16 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The resulting stream of the loaded entry is null, when trying to hash the data.");
}
var sha512Hash = Convert.ToBase64String(HashUtil.HashData(loadedEntry, HashAlgorithmName.SHA512));
var dataGraphId = recentDataVersion;
var hashGraphId = new Uri($"{dataGraphId?.AbsoluteUri}&hash={Guid.NewGuid()}");
var dataGraphSubject = dataGraph.CreateUriNode(dataGraphId);
var hashSubject = dataGraph.CreateUriNode(hashGraphId);
var defaultHash = Convert.ToBase64String(HashUtil.HashData(loadedEntry));
hashTriples.Add(new Triple(dataGraphSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashType")),
hashSubject));
hashTriples.Add(new Triple(hashSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashFunction")),
dataGraph.CreateLiteralNode("SHA512")));
hashTriples.Add(new Triple(hashSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashValue")),
dataGraph.CreateLiteralNode(sha512Hash, new Uri("http://www.w3.org/2001/XMLSchema#hexBinary"))));
if (recentDataVersion is null)
{
return;
}
GraphStorer.AddToGraph(dataGraph, hashTriples, _rdfStoreConnector);
GraphStorer.AddToGraph(dataGraph, HashUtil.CreateHashTriples(
dataGraph, new Uri(recentDataVersion), defaultHash
), _rdfStoreConnector);
}
private static void FormatResultMetadata(TripleStore tripleStore, Uri dataExtractGraph, Uri metadataExtractGraph)
......@@ -347,30 +262,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
}
}
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl)
{
var entryAlreadyExists = _rdfStoreConnector.HasGraph(graphUrl);
......@@ -382,22 +273,4 @@ public class CoscineMetadataExtractor : IMetadataExtractor
};
}
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
}
\ No newline at end of file
......@@ -9,32 +9,14 @@ public static class GraphStorer
{
foreach (var graphUri in graphUris)
{
Console.WriteLine($" ({graphUri.BaseUri})");
if (rdfStoreConnector.HasGraph(graphUri.BaseUri))
{
Console.WriteLine($" - Graph {graphUri.BaseUri} exists");
// Clear the existing graph from the store
rdfStoreConnector.ClearGraph(graphUri.BaseUri);
Console.WriteLine($" - Cleared Graph {graphUri.BaseUri}");
}
// Chunking since the size otherwise can be too large
foreach (var triples in graphUri.Triples.Chunk(100))
{
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graphUri.BaseUri, triples, Enumerable.Empty<Triple>());
}
Console.WriteLine($" - Graph {graphUri.BaseUri} added successfully");
Console.WriteLine();
rdfStoreConnector.AddGraph(graphUri);
}
}
public static void AddToGraph(IGraph graph, IEnumerable<Triple> triples, RdfStoreConnector rdfStoreConnector)
{
Console.WriteLine($" - Adding Triples to {graph.BaseUri}");
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graph.BaseUri, triples, Enumerable.Empty<Triple>());
rdfStoreConnector.AddToGraph(graph, triples);
Console.WriteLine($" - Triples added to Graph {graph.BaseUri} successfully");
Console.WriteLine();
}
......
using System.Security.Cryptography;
namespace MetadataExtractorCron.Util;
public static class HashUtil
{
private static HashAlgorithm GetHashAlgorithm(HashAlgorithmName hashAlgorithmName)
{
if (hashAlgorithmName == HashAlgorithmName.MD5)
return MD5.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA1)
return SHA1.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA256)
return SHA256.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA384)
return SHA384.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA512)
return SHA512.Create();
throw new CryptographicException($"Unknown hash algorithm \"{hashAlgorithmName.Name}\".");
}
public static byte[] HashData(Stream data,
HashAlgorithmName hashAlgorithm)
{
using var hashAlgorithmObject = GetHashAlgorithm(hashAlgorithm);
return hashAlgorithmObject.ComputeHash(data);
}
}
\ No newline at end of file
using Coscine.Metadata;
using Coscine.ResourceTypes.Base.Models;
using System.Globalization;
using VDS.RDF;
using VDS.RDF.Parsing;
using VDS.RDF.Query;
namespace MetadataExtractorCron.Util;
/// <summary>
/// Derived from MetadataMigrator
/// </summary>
public class MetadataGraphsCreator
{
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string basicContainerUri = "http://www.w3.org/ns/ldp#BasicContainer";
private const string nonRdfSourceUri = "http://www.w3.org/ns/ldp#NonRDFSource";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string dcatcatalogUri = "http://www.w3.org/ns/dcat#catalog";
private const string dcatCatalogClassUri = "http://www.w3.org/ns/dcat#Catalog";
private const string dctermsIdentifierUri = "http://purl.org/dc/terms/identifier";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string fdpMetadataServiceUri = "http://purl.org/fdp/fdp-o#MetadataService";
private const string fdphasMetadataUri = "http://purl.org/fdp/fdp-o#hasMetadata";
private const string provEntityUri = "http://www.w3.org/ns/prov#Entity";
private const string provGeneratedAtTimeUri = "http://www.w3.org/ns/prov#generatedAtTime";
private const string provWasRevisionOfUri = "http://www.w3.org/ns/prov#wasRevisionOfNode";
private const string ldpDescribedByUri = "http://www.w3.org/ns/ldp#describedBy";
private const string resourceUrlPrefix = "https://purl.org/coscine/resources";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
private RdfStoreConnector RdfStoreConnector { get; }
public MetadataGraphsCreator(RdfStoreConnector rdfStoreConnector)
{
RdfStoreConnector = rdfStoreConnector;
}
public void CreateGraphs(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{
var trellisGraph = RdfStoreConnector.GetGraph(trellisGraphUri);
var graphs = new List<IGraph>();
var triples = new List<Triple>();
var resourceGraphName = $"{resourceUrlPrefix}/{resourceId}";
var fileGraphs = fileInfos.Select((entry) =>
{
var entryGraphName = $"{resourceGraphName}/{entry.Key}";
if (!entryGraphName.EndsWith("/"))
{
entryGraphName += "/";
}
return new Uri(entryGraphName);
});
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
Console.WriteLine($"Migrating {newFileGraphName}");
var version = VersionUtil.GetNewVersion();
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata";
var newDataFileGraphName = $"{newFileGraphName}/@type=data";
var newMetadataVersionFileGraphName = $"{newFileGraphName}/@type=metadata&version={version}";
var newDataVersionFileGraphName = $"{newFileGraphName}/@type=data&version={version}";
var newFileGraph = CreateOrGetGraph(newFileGraphName);
var fileNode = newFileGraph.CreateUriNode(new Uri(newFileGraphName));
graphs.Add(newFileGraph);
// Set relation to resource, if a plain file in no folder
if (!entry.Key.Any((character) => character == '/'))
{
AddToTrellis(trellisGraph, basicContainerUri, resourceGraphName, newFileGraphName, triples);
}
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(fdpMetadataServiceUri))));
AddFilesToAFolder(trellisGraph, fileGraphs, new Uri(newFileGraphName), newFileGraph, newFileGraphName, triples);
var metadataFileGraph = SetMetadataGraph(trellisGraph, graphs, newMetadataFileGraphName, newFileGraph, newFileGraphName, triples);
var dataFileGraph = SetDataGraph(trellisGraph, graphs, newDataFileGraphName, newFileGraph, newFileGraphName, metadataFileGraph.BaseUri.AbsoluteUri, triples);
var existingGraphs = ListGraphs(newFileGraphName + "/");
SetDataVersionGraph(graphs, newDataVersionFileGraphName, dataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
SetMetadataVersionGraph(graphs, newMetadataVersionFileGraphName, metadataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
GraphStorer.StoreGraphs(graphs, RdfStoreConnector);
GraphStorer.AddToGraph(trellisGraph, triples, RdfStoreConnector);
}
private static void AddFilesToAFolder(IGraph trellisGraph, IEnumerable<Uri> fileGraphs, Uri fileGraph, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
// Add all files to a folder
foreach (var otherFileGraph in fileGraphs)
{
// TODO: Deal with multiple levels of files
if (otherFileGraph.AbsoluteUri != fileGraph.AbsoluteUri
&& otherFileGraph.AbsoluteUri.Contains(fileGraph.AbsoluteUri + "/")
&& !otherFileGraph.AbsoluteUri.Contains("&data")
&& !otherFileGraph.AbsoluteUri.Contains("?type=")
&& !otherFileGraph.AbsoluteUri.Contains("&type=")
&& !otherFileGraph.AbsoluteUri.Contains("@type="))
{
var otherFileNode = newFileGraph.CreateUriNode(otherFileGraph);
newFileGraph.Assert(new Triple(newFileGraph.CreateUriNode(new Uri(fileUri)), newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), otherFileNode));
AddToTrellis(trellisGraph, basicContainerUri, fileUri, otherFileGraph.AbsoluteUri, triples);
}
}
}
private IGraph SetMetadataGraph(IGraph trellisGraph, List<IGraph> graphs, string newMetadataFileGraphName, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
var metadataFileNode = newFileGraph.CreateUriNode(new Uri(newMetadataFileGraphName));
var metadataFileGraph = CreateOrGetGraph(newMetadataFileGraphName);
graphs.Add(metadataFileGraph);
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, newMetadataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), metadataFileNode));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(fdphasMetadataUri)), metadataFileNode));
metadataFileGraph.Assert(new Triple(
Tools.CopyNode(metadataFileNode, metadataFileGraph),
metadataFileGraph.CreateUriNode(new Uri(aUri)),
metadataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))
));
return metadataFileGraph;
}
private IGraph SetDataGraph(IGraph trellisGraph, List<IGraph> graphs, string newDataFileGraphName, IGraph newFileGraph, string fileUri, string metadataFileUri, ICollection<Triple> triples)
{
var dataFileNode = newFileGraph.CreateUriNode(new Uri(newDataFileGraphName));
var dataFileGraph = CreateOrGetGraph(newDataFileGraphName);
graphs.Add(dataFileGraph);
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, newDataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), dataFileNode));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(ldpDescribedByUri)), dataFileGraph.CreateUriNode(new Uri(metadataFileUri))));
return dataFileGraph;
}
private void SetDataVersionGraph(List<IGraph> graphs, string newDataVersionFileGraphName, IGraph dataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
IGraph currentDataVersionGraph;
if (recentDataVersion == null)
{
currentDataVersionGraph = new Graph()
{
BaseUri = new Uri(newDataVersionFileGraphName),
};
}
else
{
currentDataVersionGraph = RdfStoreConnector.GetGraph(recentDataVersion);
}
var currentDataVersionNode = currentDataVersionGraph.CreateUriNode(currentDataVersionGraph.BaseUri);
currentDataVersionGraph.Assert(new Triple(currentDataVersionNode, currentDataVersionGraph.CreateUriNode(new Uri(dctermsIdentifierUri)), currentDataVersionGraph.CreateLiteralNode(
currentDataVersionGraph.BaseUri.AbsoluteUri,
new Uri(XmlSpecsHelper.XmlSchemaDataTypeString)
)));
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!dataFileGraph.ContainsTriple(provTriple))
{
dataFileGraph.Assert(provTriple);
dataFileGraph.Assert(new Triple(
dataFileGraph.CreateUriNode(dataFileGraph.BaseUri),
dataFileGraph.CreateUriNode(new Uri("http://www.w3.org/ns/dcat#dataset")),
Tools.CopyNode(currentDataVersionNode, dataFileGraph)
));
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), dataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, currentDataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentDataVersion != null && recentDataVersion.AbsoluteUri != currentDataVersionGraph.BaseUri.AbsoluteUri)
{
var recentDataVersionNode = dataFileGraph.CreateUriNode(recentDataVersion);
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentDataVersionNode));
}
graphs.Add(currentDataVersionGraph);
}
private void SetMetadataVersionGraph(List<IGraph> graphs, string newMetadataVersionFileGraphName, IGraph metadataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
IGraph currentMetadataVersionGraph;
if (recentMetadataVersion == null)
{
currentMetadataVersionGraph = new Graph()
{
BaseUri = new Uri(newMetadataVersionFileGraphName),
};
}
else
{
currentMetadataVersionGraph = RdfStoreConnector.GetGraph(recentMetadataVersion);
}
var currentMetadataVersionNode = currentMetadataVersionGraph.CreateUriNode(currentMetadataVersionGraph.BaseUri);
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(aUri)), metadataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!metadataFileGraph.ContainsTriple(provTriple))
{
metadataFileGraph.Assert(provTriple);
metadataFileGraph.Assert(new Triple(
metadataFileGraph.CreateUriNode(metadataFileGraph.BaseUri),
metadataFileGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#hasMetadata")),
Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph)
));
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), metadataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, currentMetadataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentMetadataVersion != null && recentMetadataVersion.AbsoluteUri != currentMetadataVersionGraph.BaseUri.AbsoluteUri)
{
var recentMetadataVersionNode = metadataFileGraph.CreateUriNode(recentMetadataVersion);
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentMetadataVersionNode));
}
graphs.Add(currentMetadataVersionGraph);
}
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl)
{
var entryAlreadyExists = RdfStoreConnector.HasGraph(graphUrl);
return entryAlreadyExists
? RdfStoreConnector.GetGraph(graphUrl)
: new Graph()
{
BaseUri = new Uri(graphUrl)
};
}
public IEnumerable<Uri> ListGraphs(string id)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
}
return graphs;
}
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
}
\ No newline at end of file
using System.Web;
namespace MetadataExtractorCron;
public static class VersionUtil
{
public static Uri? GetRecentVersion(IEnumerable<Uri> graphUris, string? filter = null, bool notFilterExtracted = true)
{
var currentBest = graphUris.FirstOrDefault();
var currentBestVersion = 0L;
foreach (var graphUri in graphUris)
{
var queryDictionary = HttpUtility.ParseQueryString(new Uri(graphUri.ToString().Replace("@", "?")).Query);
var version = queryDictionary["version"];
if (version == null || !long.TryParse(version, out long longVersion))
{
continue;
}
if (longVersion > currentBestVersion
&& (filter == null || queryDictionary["type"] == filter)
&&
((notFilterExtracted && queryDictionary["extracted"] == null)
|| (!notFilterExtracted && queryDictionary["extracted"] != null))
)
{
currentBestVersion = longVersion;
currentBest = graphUri;
}
}
return currentBest;
}
public static Uri? GetRecentDataExtractedVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data", false);
}
public static Uri? GetRecentDataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data");
}
public static Uri? GetRecentMetadataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "metadata");
}
public static long GetNewVersion()
{
// UTC Timestamp
return long.Parse(Convert.ToString((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds));
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment