diff --git a/src/MetadataExtractorCron/Extractors/CoscineMetadataExtractor.cs b/src/MetadataExtractorCron/Extractors/CoscineMetadataExtractor.cs index f938aef741fe63802fdd035a7c216d01630e496a..83c91bbeff2cfae614fd641040d5526aea8b240c 100644 --- a/src/MetadataExtractorCron/Extractors/CoscineMetadataExtractor.cs +++ b/src/MetadataExtractorCron/Extractors/CoscineMetadataExtractor.cs @@ -6,12 +6,12 @@ using Coscine.ResourceTypes.Base; using Coscine.ResourceTypes.Base.Models; using Org.OpenAPITools.Api; using Org.OpenAPITools.Model; -using VDS.RDF.Query; using VDS.RDF; using MetadataExtractorCron.Util; using VDS.RDF.Parsing; using System.Globalization; using System.Security.Cryptography; +using Coscine.Metadata.Util; namespace MetadataExtractorCron.Extractors; @@ -25,14 +25,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor private readonly RdfStoreConnector _rdfStoreConnector; private readonly MetadataGraphsCreator _metadataGraphsCreator; - private const string metadataExtractionVersionUrl = "https://purl.org/coscine/terms/metatadataextraction#version"; - private const string dcatdistributionUrl = "http://www.w3.org/ns/dcat#distribution"; - private const string partOfUri = "http://purl.org/dc/terms/isPartOf"; - private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; - private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified"; - private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource"; - private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged"; - public CoscineMetadataExtractor() { _configuration = new ConsulConfiguration(); @@ -72,7 +64,7 @@ public class CoscineMetadataExtractor : IMetadataExtractor foreach (var file in fileInfos.Where((fileInfo) => fileInfo.HasBody)) { - if (file.BodyBytes > 16 * 1000 * 1000) + if (file.BodyBytes > VersionUtil.DetectionByteLimit) { Console.WriteLine($"Skipping {file.Key} on {resourceId} since it has a too large byte size"); continue; @@ -114,63 +106,31 @@ public class CoscineMetadataExtractor : IMetadataExtractor private void CreateMetadataSetsIfDontExist(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos) { - var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}"; - var newFileGraphName = $"{resourceGraphName}/{entry.Key}"; - if (!newFileGraphName.EndsWith("/")) - { - newFileGraphName += "/"; - } - - var existingGraphs = ListGraphs(newFileGraphName); + var existingGraphs = _rdfStoreConnector.GetMetadataIds(resourceId, entry.Key); if (!existingGraphs.Any()) { - Console.WriteLine($"Creating graphs for {newFileGraphName} since they did not exist before!"); - _metadataGraphsCreator.CreateGraphs(resourceId, entry, fileInfos); - } - } - - private IEnumerable<Uri> ListGraphs(string id) - { - var cmdString = new SparqlParameterizedString - { - CommandText = @"SELECT DISTINCT ?g - WHERE { GRAPH ?g { ?s ?p ?o } - FILTER(contains(str(?g), @graph)) }" - }; - cmdString.SetLiteral("graph", id); - - var resultSet = _rdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString()); - - var graphs = new List<Uri>(); - foreach (SparqlResult r in resultSet) - { - var uriNode = r.Value("g") as UriNode; - if (uriNode is not null) - { - graphs.Add(uriNode.Uri); - } + Console.WriteLine($"Creating graphs for {resourceId}, {entry.Key} since they did not exist before!"); + GraphStorer.StoreGraphs(_metadataGraphsCreator.CreateGraphs( + resourceId, + entry.Key, + true, + true + ), _rdfStoreConnector); } - return graphs; } private bool HasCurrentMetadataExtracted(string resourceId, ResourceEntry entry) { - var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}"; - var newFileGraphName = $"{resourceGraphName}/{entry.Key}"; - if (!newFileGraphName.EndsWith("/")) - { - newFileGraphName += "/"; - } - - var existingGraphs = ListGraphs(newFileGraphName); + var existingGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key); + var existingExtractedGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key, true); var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs); - var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingGraphs); + var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingExtractedGraphs); return recentDataExtractedVersion != null && recentDataVersion != null - && recentDataExtractedVersion.AbsoluteUri.Contains(recentDataVersion.AbsoluteUri) - && recentDataExtractedVersion.AbsoluteUri != recentDataVersion.AbsoluteUri; + && recentDataExtractedVersion.Contains(recentDataVersion) + && recentDataExtractedVersion != recentDataVersion; } private async Task<MetadataOutput> ExtractMetadata(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions) @@ -195,9 +155,9 @@ public class CoscineMetadataExtractor : IMetadataExtractor var extractedOutputs = await _apiClient.PostMetadataExtractorWorkerAsync( givenStream, $"{resourceId}/{entry.Key.Replace("\\", "/")}", - null, - entry.Created?.ToString("o", CultureInfo.InvariantCulture), - entry.Modified?.ToString("o", CultureInfo.InvariantCulture) + null!, + entry.Created?.ToString("o", CultureInfo.InvariantCulture)!, + entry.Modified?.ToString("o", CultureInfo.InvariantCulture)! ); return extractedOutputs[0]; @@ -221,9 +181,8 @@ public class CoscineMetadataExtractor : IMetadataExtractor newFileGraphNameAddon += "/"; } - var existingGraphs = ListGraphs(newFileGraphNameAddon); - var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs); - var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs); + var recentDataVersion = _rdfStoreConnector.GetDataId(resourceId, entry.Key); + var recentMetadataVersion = _rdfStoreConnector.GetMetadataId(resourceId, entry.Key); await CreateHashData(resourceId, entry, resourceTypeDefinition, resourceTypeOptions, newFileGraphNameAddon, recentDataVersion); @@ -232,14 +191,14 @@ public class CoscineMetadataExtractor : IMetadataExtractor throw new NullReferenceException("The recent data version is null and can't be used."); } - var recentDataExtractedVersion = new Uri(recentDataVersion.AbsoluteUri + "&extracted=true"); + var recentDataExtractedVersion = new Uri(recentDataVersion + "&extracted=true"); if (recentMetadataVersion is null) { throw new NullReferenceException("The recent metadata version is null and can't be used."); } - var recentMetadataExtractedVersion = new Uri(recentMetadataVersion.AbsoluteUri + "&extracted=true"); + var recentMetadataExtractedVersion = new Uri(recentMetadataVersion + "&extracted=true"); var tripleStore = new TripleStore(); tripleStore.LoadFromString(extractedMetadata.Metadata, new TriGParser(TriGSyntax.Recommendation)); @@ -248,57 +207,22 @@ public class CoscineMetadataExtractor : IMetadataExtractor GraphStorer.StoreGraphs(tripleStore.Graphs, _rdfStoreConnector); - var trellisGraph = _rdfStoreConnector.GetGraph(trellisGraphUri); - var triples = new List<Triple>(); - - AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentDataExtractedVersion.AbsoluteUri, triples); - AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentMetadataExtractedVersion.AbsoluteUri, triples); - GraphStorer.AddToGraph(trellisGraph, triples, _rdfStoreConnector); - - var newDataFileGraphName = $"{newFileGraphName}/@type=data"; - var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata"; - - var dataGraph = CreateOrGetGraph(newDataFileGraphName); - var metadataGraph = CreateOrGetGraph(newMetadataFileGraphName); - - dataGraph.Assert(new Triple( - dataGraph.CreateUriNode(new Uri(newDataFileGraphName)), - dataGraph.CreateUriNode(new Uri(dcatdistributionUrl)), - dataGraph.CreateUriNode(recentDataExtractedVersion) - )); - dataGraph.Assert(new Triple( - dataGraph.CreateUriNode(recentDataExtractedVersion), - dataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)), - dataGraph.CreateLiteralNode(metadataExtractorVersion) - )); - - metadataGraph.Assert(new Triple( - metadataGraph.CreateUriNode(new Uri(newMetadataFileGraphName)), - metadataGraph.CreateUriNode(new Uri(dcatdistributionUrl)), - metadataGraph.CreateUriNode(recentMetadataExtractedVersion) - )); - metadataGraph.Assert(new Triple( - metadataGraph.CreateUriNode(recentMetadataExtractedVersion), - metadataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)), - metadataGraph.CreateLiteralNode(metadataExtractorVersion) - )); - metadataGraph.Assert(new Triple( - metadataGraph.CreateUriNode(recentMetadataVersion), - metadataGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#isMetadataOf")), - metadataGraph.CreateUriNode(recentDataVersion) - )); - - var provenanceGraphs = new List<IGraph> { dataGraph, metadataGraph }; - GraphStorer.StoreGraphs(provenanceGraphs, _rdfStoreConnector); + GraphStorer.StoreGraphs( + _metadataGraphsCreator.UpdateExtractionGraphs( + resourceId, + entry.Key, + recentDataVersion, + recentMetadataVersion, + metadataExtractorVersion + ), + _rdfStoreConnector); } - private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, Uri? recentDataVersion) + private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, string? recentDataVersion) { var dataGraphName = $"{newFileGraphNameAddon}@type=data"; var dataGraph = CreateOrGetGraph(dataGraphName); - var hashTriples = new List<Triple>(); - var loadedEntry = await resourceTypeDefinition.LoadEntry(resourceId, entry.Key, resourceTypeOptions); if (loadedEntry is null) @@ -306,25 +230,16 @@ public class CoscineMetadataExtractor : IMetadataExtractor throw new NullReferenceException("The resulting stream of the loaded entry is null, when trying to hash the data."); } - var sha512Hash = Convert.ToBase64String(HashUtil.HashData(loadedEntry, HashAlgorithmName.SHA512)); - - var dataGraphId = recentDataVersion; - var hashGraphId = new Uri($"{dataGraphId?.AbsoluteUri}&hash={Guid.NewGuid()}"); - - var dataGraphSubject = dataGraph.CreateUriNode(dataGraphId); - var hashSubject = dataGraph.CreateUriNode(hashGraphId); + var defaultHash = Convert.ToBase64String(HashUtil.HashData(loadedEntry)); - hashTriples.Add(new Triple(dataGraphSubject, - dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashType")), - hashSubject)); - hashTriples.Add(new Triple(hashSubject, - dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashFunction")), - dataGraph.CreateLiteralNode("SHA512"))); - hashTriples.Add(new Triple(hashSubject, - dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashValue")), - dataGraph.CreateLiteralNode(sha512Hash, new Uri("http://www.w3.org/2001/XMLSchema#hexBinary")))); + if (recentDataVersion is null) + { + return; + } - GraphStorer.AddToGraph(dataGraph, hashTriples, _rdfStoreConnector); + GraphStorer.AddToGraph(dataGraph, HashUtil.CreateHashTriples( + dataGraph, new Uri(recentDataVersion), defaultHash + ), _rdfStoreConnector); } private static void FormatResultMetadata(TripleStore tripleStore, Uri dataExtractGraph, Uri metadataExtractGraph) @@ -347,30 +262,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor } } - private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples) - { - var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri)); - var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri)); - var triple = new Triple( - setGraphNode, - trellisGraph.CreateUriNode(new Uri(partOfUri)), - setThePartNode - ); - if (!trellisGraph.ContainsTriple(triple)) - { - triples.Add(triple); - trellisGraph.Assert(triple); - var assignmentTriple = new Triple( - setGraphNode, - trellisGraph.CreateUriNode(new Uri(aUri)), - trellisGraph.CreateUriNode(new Uri(ldpAssignment)) - ); - triples.Add(assignmentTriple); - trellisGraph.Assert(assignmentTriple); - AddModifiedDate(trellisGraph, graphUri, triples); - } - } - private IGraph CreateOrGetGraph(string graphUrl) { var entryAlreadyExists = _rdfStoreConnector.HasGraph(graphUrl); @@ -382,22 +273,4 @@ public class CoscineMetadataExtractor : IMetadataExtractor }; } - private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples) - { - var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri)); - var rootNode = graph.CreateUriNode(new Uri(root)); - if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any()) - { - var triple = new Triple( - rootNode, - dcTermsModifiedNode, - graph.CreateLiteralNode( - DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture), - new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime) - ) - ); - triples.Add(triple); - graph.Assert(triple); - } - } } \ No newline at end of file diff --git a/src/MetadataExtractorCron/Util/GraphStorer.cs b/src/MetadataExtractorCron/Util/GraphStorer.cs index 438552b9ff6307e7a4a56ced6048ca536122bc5e..3e46ba84d1ac7d924851e0ca9d70fbb75289069e 100644 --- a/src/MetadataExtractorCron/Util/GraphStorer.cs +++ b/src/MetadataExtractorCron/Util/GraphStorer.cs @@ -9,32 +9,14 @@ public static class GraphStorer { foreach (var graphUri in graphUris) { - Console.WriteLine($" ({graphUri.BaseUri})"); - - if (rdfStoreConnector.HasGraph(graphUri.BaseUri)) - { - Console.WriteLine($" - Graph {graphUri.BaseUri} exists"); - - // Clear the existing graph from the store - rdfStoreConnector.ClearGraph(graphUri.BaseUri); - Console.WriteLine($" - Cleared Graph {graphUri.BaseUri}"); - } - - // Chunking since the size otherwise can be too large - foreach (var triples in graphUri.Triples.Chunk(100)) - { - rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graphUri.BaseUri, triples, Enumerable.Empty<Triple>()); - } - - Console.WriteLine($" - Graph {graphUri.BaseUri} added successfully"); - Console.WriteLine(); + rdfStoreConnector.AddGraph(graphUri); } } public static void AddToGraph(IGraph graph, IEnumerable<Triple> triples, RdfStoreConnector rdfStoreConnector) { Console.WriteLine($" - Adding Triples to {graph.BaseUri}"); - rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graph.BaseUri, triples, Enumerable.Empty<Triple>()); + rdfStoreConnector.AddToGraph(graph, triples); Console.WriteLine($" - Triples added to Graph {graph.BaseUri} successfully"); Console.WriteLine(); } diff --git a/src/MetadataExtractorCron/Util/HashUtil.cs b/src/MetadataExtractorCron/Util/HashUtil.cs deleted file mode 100644 index 315b2f8f4656fb1bca544033c95a241da1923297..0000000000000000000000000000000000000000 --- a/src/MetadataExtractorCron/Util/HashUtil.cs +++ /dev/null @@ -1,29 +0,0 @@ -using System.Security.Cryptography; - -namespace MetadataExtractorCron.Util; - -public static class HashUtil -{ - private static HashAlgorithm GetHashAlgorithm(HashAlgorithmName hashAlgorithmName) - { - if (hashAlgorithmName == HashAlgorithmName.MD5) - return MD5.Create(); - if (hashAlgorithmName == HashAlgorithmName.SHA1) - return SHA1.Create(); - if (hashAlgorithmName == HashAlgorithmName.SHA256) - return SHA256.Create(); - if (hashAlgorithmName == HashAlgorithmName.SHA384) - return SHA384.Create(); - if (hashAlgorithmName == HashAlgorithmName.SHA512) - return SHA512.Create(); - - throw new CryptographicException($"Unknown hash algorithm \"{hashAlgorithmName.Name}\"."); - } - - public static byte[] HashData(Stream data, - HashAlgorithmName hashAlgorithm) - { - using var hashAlgorithmObject = GetHashAlgorithm(hashAlgorithm); - return hashAlgorithmObject.ComputeHash(data); - } -} \ No newline at end of file diff --git a/src/MetadataExtractorCron/Util/MetadataGraphsCreator.cs b/src/MetadataExtractorCron/Util/MetadataGraphsCreator.cs deleted file mode 100644 index afbfe1aeef8db2acc4825e19fa0c03bf49b452b5..0000000000000000000000000000000000000000 --- a/src/MetadataExtractorCron/Util/MetadataGraphsCreator.cs +++ /dev/null @@ -1,319 +0,0 @@ -using Coscine.Metadata; -using Coscine.ResourceTypes.Base.Models; -using System.Globalization; -using VDS.RDF; -using VDS.RDF.Parsing; -using VDS.RDF.Query; - -namespace MetadataExtractorCron.Util; - -/// <summary> -/// Derived from MetadataMigrator -/// </summary> -public class MetadataGraphsCreator -{ - private const string partOfUri = "http://purl.org/dc/terms/isPartOf"; - private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; - private const string basicContainerUri = "http://www.w3.org/ns/ldp#BasicContainer"; - private const string nonRdfSourceUri = "http://www.w3.org/ns/ldp#NonRDFSource"; - private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource"; - - private const string dcatcatalogUri = "http://www.w3.org/ns/dcat#catalog"; - private const string dcatCatalogClassUri = "http://www.w3.org/ns/dcat#Catalog"; - - private const string dctermsIdentifierUri = "http://purl.org/dc/terms/identifier"; - private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified"; - - private const string fdpMetadataServiceUri = "http://purl.org/fdp/fdp-o#MetadataService"; - private const string fdphasMetadataUri = "http://purl.org/fdp/fdp-o#hasMetadata"; - - private const string provEntityUri = "http://www.w3.org/ns/prov#Entity"; - private const string provGeneratedAtTimeUri = "http://www.w3.org/ns/prov#generatedAtTime"; - private const string provWasRevisionOfUri = "http://www.w3.org/ns/prov#wasRevisionOfNode"; - - private const string ldpDescribedByUri = "http://www.w3.org/ns/ldp#describedBy"; - - private const string resourceUrlPrefix = "https://purl.org/coscine/resources"; - - private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged"; - - private RdfStoreConnector RdfStoreConnector { get; } - - public MetadataGraphsCreator(RdfStoreConnector rdfStoreConnector) - { - RdfStoreConnector = rdfStoreConnector; - } - - public void CreateGraphs(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos) - { - var trellisGraph = RdfStoreConnector.GetGraph(trellisGraphUri); - - var graphs = new List<IGraph>(); - var triples = new List<Triple>(); - - var resourceGraphName = $"{resourceUrlPrefix}/{resourceId}"; - - var fileGraphs = fileInfos.Select((entry) => - { - var entryGraphName = $"{resourceGraphName}/{entry.Key}"; - if (!entryGraphName.EndsWith("/")) - { - entryGraphName += "/"; - } - return new Uri(entryGraphName); - }); - - var newFileGraphName = $"{resourceGraphName}/{entry.Key}"; - Console.WriteLine($"Migrating {newFileGraphName}"); - - var version = VersionUtil.GetNewVersion(); - - var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata"; - var newDataFileGraphName = $"{newFileGraphName}/@type=data"; - var newMetadataVersionFileGraphName = $"{newFileGraphName}/@type=metadata&version={version}"; - var newDataVersionFileGraphName = $"{newFileGraphName}/@type=data&version={version}"; - - var newFileGraph = CreateOrGetGraph(newFileGraphName); - var fileNode = newFileGraph.CreateUriNode(new Uri(newFileGraphName)); - graphs.Add(newFileGraph); - - // Set relation to resource, if a plain file in no folder - if (!entry.Key.Any((character) => character == '/')) - { - AddToTrellis(trellisGraph, basicContainerUri, resourceGraphName, newFileGraphName, triples); - } - - newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri)))); - newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(fdpMetadataServiceUri)))); - AddFilesToAFolder(trellisGraph, fileGraphs, new Uri(newFileGraphName), newFileGraph, newFileGraphName, triples); - - var metadataFileGraph = SetMetadataGraph(trellisGraph, graphs, newMetadataFileGraphName, newFileGraph, newFileGraphName, triples); - var dataFileGraph = SetDataGraph(trellisGraph, graphs, newDataFileGraphName, newFileGraph, newFileGraphName, metadataFileGraph.BaseUri.AbsoluteUri, triples); - - var existingGraphs = ListGraphs(newFileGraphName + "/"); - - SetDataVersionGraph(graphs, newDataVersionFileGraphName, dataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples); - SetMetadataVersionGraph(graphs, newMetadataVersionFileGraphName, metadataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples); - - GraphStorer.StoreGraphs(graphs, RdfStoreConnector); - GraphStorer.AddToGraph(trellisGraph, triples, RdfStoreConnector); - } - - private static void AddFilesToAFolder(IGraph trellisGraph, IEnumerable<Uri> fileGraphs, Uri fileGraph, IGraph newFileGraph, string fileUri, ICollection<Triple> triples) - { - // Add all files to a folder - foreach (var otherFileGraph in fileGraphs) - { - // TODO: Deal with multiple levels of files - if (otherFileGraph.AbsoluteUri != fileGraph.AbsoluteUri - && otherFileGraph.AbsoluteUri.Contains(fileGraph.AbsoluteUri + "/") - && !otherFileGraph.AbsoluteUri.Contains("&data") - && !otherFileGraph.AbsoluteUri.Contains("?type=") - && !otherFileGraph.AbsoluteUri.Contains("&type=") - && !otherFileGraph.AbsoluteUri.Contains("@type=")) - { - var otherFileNode = newFileGraph.CreateUriNode(otherFileGraph); - newFileGraph.Assert(new Triple(newFileGraph.CreateUriNode(new Uri(fileUri)), newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), otherFileNode)); - AddToTrellis(trellisGraph, basicContainerUri, fileUri, otherFileGraph.AbsoluteUri, triples); - } - } - } - - private IGraph SetMetadataGraph(IGraph trellisGraph, List<IGraph> graphs, string newMetadataFileGraphName, IGraph newFileGraph, string fileUri, ICollection<Triple> triples) - { - var metadataFileNode = newFileGraph.CreateUriNode(new Uri(newMetadataFileGraphName)); - var metadataFileGraph = CreateOrGetGraph(newMetadataFileGraphName); - graphs.Add(metadataFileGraph); - AddToTrellis(trellisGraph, rdfSourceUri, fileUri, newMetadataFileGraphName, triples); - var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri)); - newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), metadataFileNode)); - newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(fdphasMetadataUri)), metadataFileNode)); - metadataFileGraph.Assert(new Triple( - Tools.CopyNode(metadataFileNode, metadataFileGraph), - metadataFileGraph.CreateUriNode(new Uri(aUri)), - metadataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri)) - )); - return metadataFileGraph; - } - - private IGraph SetDataGraph(IGraph trellisGraph, List<IGraph> graphs, string newDataFileGraphName, IGraph newFileGraph, string fileUri, string metadataFileUri, ICollection<Triple> triples) - { - var dataFileNode = newFileGraph.CreateUriNode(new Uri(newDataFileGraphName)); - var dataFileGraph = CreateOrGetGraph(newDataFileGraphName); - graphs.Add(dataFileGraph); - AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, newDataFileGraphName, triples); - var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri)); - newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), dataFileNode)); - dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri)))); - dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(ldpDescribedByUri)), dataFileGraph.CreateUriNode(new Uri(metadataFileUri)))); - return dataFileGraph; - } - - private void SetDataVersionGraph(List<IGraph> graphs, string newDataVersionFileGraphName, IGraph dataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples) - { - var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs); - - IGraph currentDataVersionGraph; - if (recentDataVersion == null) - { - currentDataVersionGraph = new Graph() - { - BaseUri = new Uri(newDataVersionFileGraphName), - }; - } - else - { - currentDataVersionGraph = RdfStoreConnector.GetGraph(recentDataVersion); - } - var currentDataVersionNode = currentDataVersionGraph.CreateUriNode(currentDataVersionGraph.BaseUri); - currentDataVersionGraph.Assert(new Triple(currentDataVersionNode, currentDataVersionGraph.CreateUriNode(new Uri(dctermsIdentifierUri)), currentDataVersionGraph.CreateLiteralNode( - currentDataVersionGraph.BaseUri.AbsoluteUri, - new Uri(XmlSpecsHelper.XmlSchemaDataTypeString) - ))); - - // PROV Info - var provTriple = new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(provEntityUri))); - if (!dataFileGraph.ContainsTriple(provTriple)) - { - dataFileGraph.Assert(provTriple); - dataFileGraph.Assert(new Triple( - dataFileGraph.CreateUriNode(dataFileGraph.BaseUri), - dataFileGraph.CreateUriNode(new Uri("http://www.w3.org/ns/dcat#dataset")), - Tools.CopyNode(currentDataVersionNode, dataFileGraph) - )); - dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), dataFileGraph.CreateLiteralNode( - DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture), - new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime) - ))); - AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, currentDataVersionGraph.BaseUri.AbsoluteUri, triples); - } - if (recentDataVersion != null && recentDataVersion.AbsoluteUri != currentDataVersionGraph.BaseUri.AbsoluteUri) - { - var recentDataVersionNode = dataFileGraph.CreateUriNode(recentDataVersion); - dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentDataVersionNode)); - } - - graphs.Add(currentDataVersionGraph); - } - - private void SetMetadataVersionGraph(List<IGraph> graphs, string newMetadataVersionFileGraphName, IGraph metadataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples) - { - var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs); - - IGraph currentMetadataVersionGraph; - if (recentMetadataVersion == null) - { - currentMetadataVersionGraph = new Graph() - { - BaseUri = new Uri(newMetadataVersionFileGraphName), - }; - } - else - { - currentMetadataVersionGraph = RdfStoreConnector.GetGraph(recentMetadataVersion); - } - var currentMetadataVersionNode = currentMetadataVersionGraph.CreateUriNode(currentMetadataVersionGraph.BaseUri); - - // PROV Info - var provTriple = new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(aUri)), metadataFileGraph.CreateUriNode(new Uri(provEntityUri))); - if (!metadataFileGraph.ContainsTriple(provTriple)) - { - metadataFileGraph.Assert(provTriple); - metadataFileGraph.Assert(new Triple( - metadataFileGraph.CreateUriNode(metadataFileGraph.BaseUri), - metadataFileGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#hasMetadata")), - Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph) - )); - metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), metadataFileGraph.CreateLiteralNode( - DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture), - new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime) - ))); - AddToTrellis(trellisGraph, rdfSourceUri, fileUri, currentMetadataVersionGraph.BaseUri.AbsoluteUri, triples); - } - if (recentMetadataVersion != null && recentMetadataVersion.AbsoluteUri != currentMetadataVersionGraph.BaseUri.AbsoluteUri) - { - var recentMetadataVersionNode = metadataFileGraph.CreateUriNode(recentMetadataVersion); - metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentMetadataVersionNode)); - } - - graphs.Add(currentMetadataVersionGraph); - } - - private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples) - { - var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri)); - var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri)); - var triple = new Triple( - setGraphNode, - trellisGraph.CreateUriNode(new Uri(partOfUri)), - setThePartNode - ); - if (!trellisGraph.ContainsTriple(triple)) - { - triples.Add(triple); - trellisGraph.Assert(triple); - var assignmentTriple = new Triple( - setGraphNode, - trellisGraph.CreateUriNode(new Uri(aUri)), - trellisGraph.CreateUriNode(new Uri(ldpAssignment)) - ); - triples.Add(assignmentTriple); - trellisGraph.Assert(assignmentTriple); - AddModifiedDate(trellisGraph, graphUri, triples); - } - } - - private IGraph CreateOrGetGraph(string graphUrl) - { - var entryAlreadyExists = RdfStoreConnector.HasGraph(graphUrl); - return entryAlreadyExists - ? RdfStoreConnector.GetGraph(graphUrl) - : new Graph() - { - BaseUri = new Uri(graphUrl) - }; - } - - public IEnumerable<Uri> ListGraphs(string id) - { - var cmdString = new SparqlParameterizedString - { - CommandText = @"SELECT DISTINCT ?g - WHERE { GRAPH ?g { ?s ?p ?o } - FILTER(contains(str(?g), @graph)) }" - }; - cmdString.SetLiteral("graph", id); - - var resultSet = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString()); - - var graphs = new List<Uri>(); - foreach (SparqlResult r in resultSet) - { - var uriNode = r.Value("g") as UriNode; - if (uriNode is not null) - { - graphs.Add(uriNode.Uri); - } - } - return graphs; - } - - private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples) - { - var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri)); - var rootNode = graph.CreateUriNode(new Uri(root)); - if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any()) - { - var triple = new Triple( - rootNode, - dcTermsModifiedNode, - graph.CreateLiteralNode( - DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture), - new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime) - ) - ); - triples.Add(triple); - graph.Assert(triple); - } - } -} \ No newline at end of file diff --git a/src/MetadataExtractorCron/VersionUtil.cs b/src/MetadataExtractorCron/VersionUtil.cs deleted file mode 100644 index 6c0105f5c1099f6c9b5aa9d6f0d92e000402f2e8..0000000000000000000000000000000000000000 --- a/src/MetadataExtractorCron/VersionUtil.cs +++ /dev/null @@ -1,53 +0,0 @@ -using System.Web; - -namespace MetadataExtractorCron; - -public static class VersionUtil -{ - public static Uri? GetRecentVersion(IEnumerable<Uri> graphUris, string? filter = null, bool notFilterExtracted = true) - { - var currentBest = graphUris.FirstOrDefault(); - var currentBestVersion = 0L; - foreach (var graphUri in graphUris) - { - var queryDictionary = HttpUtility.ParseQueryString(new Uri(graphUri.ToString().Replace("@", "?")).Query); - var version = queryDictionary["version"]; - if (version == null || !long.TryParse(version, out long longVersion)) - { - continue; - } - if (longVersion > currentBestVersion - && (filter == null || queryDictionary["type"] == filter) - && - ((notFilterExtracted && queryDictionary["extracted"] == null) - || (!notFilterExtracted && queryDictionary["extracted"] != null)) - ) - { - currentBestVersion = longVersion; - currentBest = graphUri; - } - } - return currentBest; - } - - public static Uri? GetRecentDataExtractedVersion(IEnumerable<Uri> graphUris) - { - return GetRecentVersion(graphUris, "data", false); - } - - public static Uri? GetRecentDataVersion(IEnumerable<Uri> graphUris) - { - return GetRecentVersion(graphUris, "data"); - } - - public static Uri? GetRecentMetadataVersion(IEnumerable<Uri> graphUris) - { - return GetRecentVersion(graphUris, "metadata"); - } - - public static long GetNewVersion() - { - // UTC Timestamp - return long.Parse(Convert.ToString((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds)); - } -} \ No newline at end of file