Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
Loading items

Target

Select target project
  • coscine/backend/scripts/metadataextractorcron
1 result
Select Git revision
Loading items
Show changes
Commits on Source (4)
......@@ -6,12 +6,12 @@ using Coscine.ResourceTypes.Base;
using Coscine.ResourceTypes.Base.Models;
using Org.OpenAPITools.Api;
using Org.OpenAPITools.Model;
using VDS.RDF.Query;
using VDS.RDF;
using MetadataExtractorCron.Util;
using VDS.RDF.Parsing;
using System.Globalization;
using System.Security.Cryptography;
using Coscine.Metadata.Util;
namespace MetadataExtractorCron.Extractors;
......@@ -25,14 +25,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private readonly RdfStoreConnector _rdfStoreConnector;
private readonly MetadataGraphsCreator _metadataGraphsCreator;
private const string metadataExtractionVersionUrl = "https://purl.org/coscine/terms/metatadataextraction#version";
private const string dcatdistributionUrl = "http://www.w3.org/ns/dcat#distribution";
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
public CoscineMetadataExtractor()
{
_configuration = new ConsulConfiguration();
......@@ -72,7 +64,7 @@ public class CoscineMetadataExtractor : IMetadataExtractor
foreach (var file in fileInfos.Where((fileInfo) => fileInfo.HasBody))
{
if (file.BodyBytes > 16 * 1000 * 1000)
if (file.BodyBytes > VersionUtil.DetectionByteLimit)
{
Console.WriteLine($"Skipping {file.Key} on {resourceId} since it has a too large byte size");
continue;
......@@ -114,63 +106,31 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private void CreateMetadataSetsIfDontExist(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}";
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
var existingGraphs = _rdfStoreConnector.GetMetadataIds(resourceId, entry.Key);
if (!existingGraphs.Any())
{
Console.WriteLine($"Creating graphs for {newFileGraphName} since they did not exist before!");
_metadataGraphsCreator.CreateGraphs(resourceId, entry, fileInfos);
}
}
private IEnumerable<Uri> ListGraphs(string id)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = _rdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
Console.WriteLine($"Creating graphs for {resourceId}, {entry.Key} since they did not exist before!");
GraphStorer.StoreGraphs(_metadataGraphsCreator.CreateGraphs(
resourceId,
entry.Key,
true,
true
), _rdfStoreConnector);
}
return graphs;
}
private bool HasCurrentMetadataExtracted(string resourceId, ResourceEntry entry)
{
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}";
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
var existingGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key);
var existingExtractedGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key, true);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingGraphs);
var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingExtractedGraphs);
return
recentDataExtractedVersion != null
&& recentDataVersion != null
&& recentDataExtractedVersion.AbsoluteUri.Contains(recentDataVersion.AbsoluteUri)
&& recentDataExtractedVersion.AbsoluteUri != recentDataVersion.AbsoluteUri;
&& recentDataExtractedVersion.Contains(recentDataVersion)
&& recentDataExtractedVersion != recentDataVersion;
}
private async Task<MetadataOutput> ExtractMetadata(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions)
......@@ -195,9 +155,9 @@ public class CoscineMetadataExtractor : IMetadataExtractor
var extractedOutputs = await _apiClient.PostMetadataExtractorWorkerAsync(
givenStream,
$"{resourceId}/{entry.Key.Replace("\\", "/")}",
null,
entry.Created?.ToString("o", CultureInfo.InvariantCulture),
entry.Modified?.ToString("o", CultureInfo.InvariantCulture)
null!,
entry.Created?.ToString("o", CultureInfo.InvariantCulture)!,
entry.Modified?.ToString("o", CultureInfo.InvariantCulture)!
);
return extractedOutputs[0];
......@@ -221,9 +181,8 @@ public class CoscineMetadataExtractor : IMetadataExtractor
newFileGraphNameAddon += "/";
}
var existingGraphs = ListGraphs(newFileGraphNameAddon);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
var recentDataVersion = _rdfStoreConnector.GetDataId(resourceId, entry.Key);
var recentMetadataVersion = _rdfStoreConnector.GetMetadataId(resourceId, entry.Key);
await CreateHashData(resourceId, entry, resourceTypeDefinition, resourceTypeOptions, newFileGraphNameAddon, recentDataVersion);
......@@ -232,14 +191,14 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The recent data version is null and can't be used.");
}
var recentDataExtractedVersion = new Uri(recentDataVersion.AbsoluteUri + "&extracted=true");
var recentDataExtractedVersion = new Uri(recentDataVersion + "&extracted=true");
if (recentMetadataVersion is null)
{
throw new NullReferenceException("The recent metadata version is null and can't be used.");
}
var recentMetadataExtractedVersion = new Uri(recentMetadataVersion.AbsoluteUri + "&extracted=true");
var recentMetadataExtractedVersion = new Uri(recentMetadataVersion + "&extracted=true");
var tripleStore = new TripleStore();
tripleStore.LoadFromString(extractedMetadata.Metadata, new TriGParser(TriGSyntax.Recommendation));
......@@ -248,57 +207,22 @@ public class CoscineMetadataExtractor : IMetadataExtractor
GraphStorer.StoreGraphs(tripleStore.Graphs, _rdfStoreConnector);
var trellisGraph = _rdfStoreConnector.GetGraph(trellisGraphUri);
var triples = new List<Triple>();
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentDataExtractedVersion.AbsoluteUri, triples);
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentMetadataExtractedVersion.AbsoluteUri, triples);
GraphStorer.AddToGraph(trellisGraph, triples, _rdfStoreConnector);
var newDataFileGraphName = $"{newFileGraphName}/@type=data";
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata";
var dataGraph = CreateOrGetGraph(newDataFileGraphName);
var metadataGraph = CreateOrGetGraph(newMetadataFileGraphName);
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(new Uri(newDataFileGraphName)),
dataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
dataGraph.CreateUriNode(recentDataExtractedVersion)
));
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(recentDataExtractedVersion),
dataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
dataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(new Uri(newMetadataFileGraphName)),
metadataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
metadataGraph.CreateUriNode(recentMetadataExtractedVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataExtractedVersion),
metadataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
metadataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataVersion),
metadataGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#isMetadataOf")),
metadataGraph.CreateUriNode(recentDataVersion)
));
var provenanceGraphs = new List<IGraph> { dataGraph, metadataGraph };
GraphStorer.StoreGraphs(provenanceGraphs, _rdfStoreConnector);
}
private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, Uri? recentDataVersion)
GraphStorer.StoreGraphs(
_metadataGraphsCreator.UpdateExtractionGraphs(
resourceId,
entry.Key,
recentDataVersion,
recentMetadataVersion,
metadataExtractorVersion
),
_rdfStoreConnector);
}
private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, string? recentDataVersion)
{
var dataGraphName = $"{newFileGraphNameAddon}@type=data";
var dataGraph = CreateOrGetGraph(dataGraphName);
var hashTriples = new List<Triple>();
var loadedEntry = await resourceTypeDefinition.LoadEntry(resourceId, entry.Key, resourceTypeOptions);
if (loadedEntry is null)
......@@ -306,25 +230,16 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The resulting stream of the loaded entry is null, when trying to hash the data.");
}
var sha512Hash = Convert.ToBase64String(HashUtil.HashData(loadedEntry, HashAlgorithmName.SHA512));
var dataGraphId = recentDataVersion;
var hashGraphId = new Uri($"{dataGraphId?.AbsoluteUri}&hash={Guid.NewGuid()}");
var dataGraphSubject = dataGraph.CreateUriNode(dataGraphId);
var hashSubject = dataGraph.CreateUriNode(hashGraphId);
var defaultHash = Convert.ToBase64String(HashUtil.HashData(loadedEntry));
hashTriples.Add(new Triple(dataGraphSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashType")),
hashSubject));
hashTriples.Add(new Triple(hashSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashFunction")),
dataGraph.CreateLiteralNode("SHA512")));
hashTriples.Add(new Triple(hashSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashValue")),
dataGraph.CreateLiteralNode(sha512Hash, new Uri("http://www.w3.org/2001/XMLSchema#hexBinary"))));
if (recentDataVersion is null)
{
return;
}
GraphStorer.AddToGraph(dataGraph, hashTriples, _rdfStoreConnector);
GraphStorer.AddToGraph(dataGraph, HashUtil.CreateHashTriples(
dataGraph, new Uri(recentDataVersion), defaultHash
), _rdfStoreConnector);
}
private static void FormatResultMetadata(TripleStore tripleStore, Uri dataExtractGraph, Uri metadataExtractGraph)
......@@ -347,30 +262,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
}
}
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl)
{
var entryAlreadyExists = _rdfStoreConnector.HasGraph(graphUrl);
......@@ -382,22 +273,4 @@ public class CoscineMetadataExtractor : IMetadataExtractor
};
}
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
}
\ No newline at end of file
......@@ -5,7 +5,7 @@
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>0.1.4</Version></PropertyGroup>
<Version>0.1.5</Version></PropertyGroup>
<ItemGroup>
<PackageReference Include="Coscine.Database" Version="2.*-*" />
......
......@@ -9,32 +9,14 @@ public static class GraphStorer
{
foreach (var graphUri in graphUris)
{
Console.WriteLine($" ({graphUri.BaseUri})");
if (rdfStoreConnector.HasGraph(graphUri.BaseUri))
{
Console.WriteLine($" - Graph {graphUri.BaseUri} exists");
// Clear the existing graph from the store
rdfStoreConnector.ClearGraph(graphUri.BaseUri);
Console.WriteLine($" - Cleared Graph {graphUri.BaseUri}");
}
// Chunking since the size otherwise can be too large
foreach (var triples in graphUri.Triples.Chunk(100))
{
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graphUri.BaseUri, triples, Enumerable.Empty<Triple>());
}
Console.WriteLine($" - Graph {graphUri.BaseUri} added successfully");
Console.WriteLine();
rdfStoreConnector.AddGraph(graphUri);
}
}
public static void AddToGraph(IGraph graph, IEnumerable<Triple> triples, RdfStoreConnector rdfStoreConnector)
{
Console.WriteLine($" - Adding Triples to {graph.BaseUri}");
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graph.BaseUri, triples, Enumerable.Empty<Triple>());
rdfStoreConnector.AddToGraph(graph, triples);
Console.WriteLine($" - Triples added to Graph {graph.BaseUri} successfully");
Console.WriteLine();
}
......
using System.Security.Cryptography;
namespace MetadataExtractorCron.Util;
public static class HashUtil
{
private static HashAlgorithm GetHashAlgorithm(HashAlgorithmName hashAlgorithmName)
{
if (hashAlgorithmName == HashAlgorithmName.MD5)
return MD5.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA1)
return SHA1.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA256)
return SHA256.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA384)
return SHA384.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA512)
return SHA512.Create();
throw new CryptographicException($"Unknown hash algorithm \"{hashAlgorithmName.Name}\".");
}
public static byte[] HashData(Stream data,
HashAlgorithmName hashAlgorithm)
{
using var hashAlgorithmObject = GetHashAlgorithm(hashAlgorithm);
return hashAlgorithmObject.ComputeHash(data);
}
}
\ No newline at end of file
using Coscine.Metadata;
using Coscine.ResourceTypes.Base.Models;
using System.Globalization;
using VDS.RDF;
using VDS.RDF.Parsing;
using VDS.RDF.Query;
namespace MetadataExtractorCron.Util;
/// <summary>
/// Derived from MetadataMigrator
/// </summary>
public class MetadataGraphsCreator
{
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string basicContainerUri = "http://www.w3.org/ns/ldp#BasicContainer";
private const string nonRdfSourceUri = "http://www.w3.org/ns/ldp#NonRDFSource";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string dcatcatalogUri = "http://www.w3.org/ns/dcat#catalog";
private const string dcatCatalogClassUri = "http://www.w3.org/ns/dcat#Catalog";
private const string dctermsIdentifierUri = "http://purl.org/dc/terms/identifier";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string fdpMetadataServiceUri = "http://purl.org/fdp/fdp-o#MetadataService";
private const string fdphasMetadataUri = "http://purl.org/fdp/fdp-o#hasMetadata";
private const string provEntityUri = "http://www.w3.org/ns/prov#Entity";
private const string provGeneratedAtTimeUri = "http://www.w3.org/ns/prov#generatedAtTime";
private const string provWasRevisionOfUri = "http://www.w3.org/ns/prov#wasRevisionOfNode";
private const string ldpDescribedByUri = "http://www.w3.org/ns/ldp#describedBy";
private const string resourceUrlPrefix = "https://purl.org/coscine/resources";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
private RdfStoreConnector RdfStoreConnector { get; }
public MetadataGraphsCreator(RdfStoreConnector rdfStoreConnector)
{
RdfStoreConnector = rdfStoreConnector;
}
public void CreateGraphs(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{
var trellisGraph = RdfStoreConnector.GetGraph(trellisGraphUri);
var graphs = new List<IGraph>();
var triples = new List<Triple>();
var resourceGraphName = $"{resourceUrlPrefix}/{resourceId}";
var fileGraphs = fileInfos.Select((entry) =>
{
var entryGraphName = $"{resourceGraphName}/{entry.Key}";
if (!entryGraphName.EndsWith("/"))
{
entryGraphName += "/";
}
return new Uri(entryGraphName);
});
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
Console.WriteLine($"Migrating {newFileGraphName}");
var version = VersionUtil.GetNewVersion();
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata";
var newDataFileGraphName = $"{newFileGraphName}/@type=data";
var newMetadataVersionFileGraphName = $"{newFileGraphName}/@type=metadata&version={version}";
var newDataVersionFileGraphName = $"{newFileGraphName}/@type=data&version={version}";
var newFileGraph = CreateOrGetGraph(newFileGraphName);
var fileNode = newFileGraph.CreateUriNode(new Uri(newFileGraphName));
graphs.Add(newFileGraph);
// Set relation to resource, if a plain file in no folder
if (!entry.Key.Any((character) => character == '/'))
{
AddToTrellis(trellisGraph, basicContainerUri, resourceGraphName, newFileGraphName, triples);
}
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(fdpMetadataServiceUri))));
AddFilesToAFolder(trellisGraph, fileGraphs, new Uri(newFileGraphName), newFileGraph, newFileGraphName, triples);
var metadataFileGraph = SetMetadataGraph(trellisGraph, graphs, newMetadataFileGraphName, newFileGraph, newFileGraphName, triples);
var dataFileGraph = SetDataGraph(trellisGraph, graphs, newDataFileGraphName, newFileGraph, newFileGraphName, metadataFileGraph.BaseUri.AbsoluteUri, triples);
var existingGraphs = ListGraphs(newFileGraphName + "/");
SetDataVersionGraph(graphs, newDataVersionFileGraphName, dataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
SetMetadataVersionGraph(graphs, newMetadataVersionFileGraphName, metadataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
GraphStorer.StoreGraphs(graphs, RdfStoreConnector);
GraphStorer.AddToGraph(trellisGraph, triples, RdfStoreConnector);
}
private static void AddFilesToAFolder(IGraph trellisGraph, IEnumerable<Uri> fileGraphs, Uri fileGraph, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
// Add all files to a folder
foreach (var otherFileGraph in fileGraphs)
{
// TODO: Deal with multiple levels of files
if (otherFileGraph.AbsoluteUri != fileGraph.AbsoluteUri
&& otherFileGraph.AbsoluteUri.Contains(fileGraph.AbsoluteUri + "/")
&& !otherFileGraph.AbsoluteUri.Contains("&data")
&& !otherFileGraph.AbsoluteUri.Contains("?type=")
&& !otherFileGraph.AbsoluteUri.Contains("&type=")
&& !otherFileGraph.AbsoluteUri.Contains("@type="))
{
var otherFileNode = newFileGraph.CreateUriNode(otherFileGraph);
newFileGraph.Assert(new Triple(newFileGraph.CreateUriNode(new Uri(fileUri)), newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), otherFileNode));
AddToTrellis(trellisGraph, basicContainerUri, fileUri, otherFileGraph.AbsoluteUri, triples);
}
}
}
private IGraph SetMetadataGraph(IGraph trellisGraph, List<IGraph> graphs, string newMetadataFileGraphName, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
var metadataFileNode = newFileGraph.CreateUriNode(new Uri(newMetadataFileGraphName));
var metadataFileGraph = CreateOrGetGraph(newMetadataFileGraphName);
graphs.Add(metadataFileGraph);
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, newMetadataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), metadataFileNode));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(fdphasMetadataUri)), metadataFileNode));
metadataFileGraph.Assert(new Triple(
Tools.CopyNode(metadataFileNode, metadataFileGraph),
metadataFileGraph.CreateUriNode(new Uri(aUri)),
metadataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))
));
return metadataFileGraph;
}
private IGraph SetDataGraph(IGraph trellisGraph, List<IGraph> graphs, string newDataFileGraphName, IGraph newFileGraph, string fileUri, string metadataFileUri, ICollection<Triple> triples)
{
var dataFileNode = newFileGraph.CreateUriNode(new Uri(newDataFileGraphName));
var dataFileGraph = CreateOrGetGraph(newDataFileGraphName);
graphs.Add(dataFileGraph);
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, newDataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), dataFileNode));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(ldpDescribedByUri)), dataFileGraph.CreateUriNode(new Uri(metadataFileUri))));
return dataFileGraph;
}
private void SetDataVersionGraph(List<IGraph> graphs, string newDataVersionFileGraphName, IGraph dataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
IGraph currentDataVersionGraph;
if (recentDataVersion == null)
{
currentDataVersionGraph = new Graph()
{
BaseUri = new Uri(newDataVersionFileGraphName),
};
}
else
{
currentDataVersionGraph = RdfStoreConnector.GetGraph(recentDataVersion);
}
var currentDataVersionNode = currentDataVersionGraph.CreateUriNode(currentDataVersionGraph.BaseUri);
currentDataVersionGraph.Assert(new Triple(currentDataVersionNode, currentDataVersionGraph.CreateUriNode(new Uri(dctermsIdentifierUri)), currentDataVersionGraph.CreateLiteralNode(
currentDataVersionGraph.BaseUri.AbsoluteUri,
new Uri(XmlSpecsHelper.XmlSchemaDataTypeString)
)));
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!dataFileGraph.ContainsTriple(provTriple))
{
dataFileGraph.Assert(provTriple);
dataFileGraph.Assert(new Triple(
dataFileGraph.CreateUriNode(dataFileGraph.BaseUri),
dataFileGraph.CreateUriNode(new Uri("http://www.w3.org/ns/dcat#dataset")),
Tools.CopyNode(currentDataVersionNode, dataFileGraph)
));
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), dataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, currentDataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentDataVersion != null && recentDataVersion.AbsoluteUri != currentDataVersionGraph.BaseUri.AbsoluteUri)
{
var recentDataVersionNode = dataFileGraph.CreateUriNode(recentDataVersion);
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentDataVersionNode));
}
graphs.Add(currentDataVersionGraph);
}
private void SetMetadataVersionGraph(List<IGraph> graphs, string newMetadataVersionFileGraphName, IGraph metadataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
IGraph currentMetadataVersionGraph;
if (recentMetadataVersion == null)
{
currentMetadataVersionGraph = new Graph()
{
BaseUri = new Uri(newMetadataVersionFileGraphName),
};
}
else
{
currentMetadataVersionGraph = RdfStoreConnector.GetGraph(recentMetadataVersion);
}
var currentMetadataVersionNode = currentMetadataVersionGraph.CreateUriNode(currentMetadataVersionGraph.BaseUri);
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(aUri)), metadataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!metadataFileGraph.ContainsTriple(provTriple))
{
metadataFileGraph.Assert(provTriple);
metadataFileGraph.Assert(new Triple(
metadataFileGraph.CreateUriNode(metadataFileGraph.BaseUri),
metadataFileGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#hasMetadata")),
Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph)
));
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), metadataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, currentMetadataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentMetadataVersion != null && recentMetadataVersion.AbsoluteUri != currentMetadataVersionGraph.BaseUri.AbsoluteUri)
{
var recentMetadataVersionNode = metadataFileGraph.CreateUriNode(recentMetadataVersion);
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentMetadataVersionNode));
}
graphs.Add(currentMetadataVersionGraph);
}
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl)
{
var entryAlreadyExists = RdfStoreConnector.HasGraph(graphUrl);
return entryAlreadyExists
? RdfStoreConnector.GetGraph(graphUrl)
: new Graph()
{
BaseUri = new Uri(graphUrl)
};
}
public IEnumerable<Uri> ListGraphs(string id)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
}
return graphs;
}
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
}
\ No newline at end of file
using System.Web;
namespace MetadataExtractorCron;
public static class VersionUtil
{
public static Uri? GetRecentVersion(IEnumerable<Uri> graphUris, string? filter = null, bool notFilterExtracted = true)
{
var currentBest = graphUris.FirstOrDefault();
var currentBestVersion = 0L;
foreach (var graphUri in graphUris)
{
var queryDictionary = HttpUtility.ParseQueryString(new Uri(graphUri.ToString().Replace("@", "?")).Query);
var version = queryDictionary["version"];
if (version == null || !long.TryParse(version, out long longVersion))
{
continue;
}
if (longVersion > currentBestVersion
&& (filter == null || queryDictionary["type"] == filter)
&&
((notFilterExtracted && queryDictionary["extracted"] == null)
|| (!notFilterExtracted && queryDictionary["extracted"] != null))
)
{
currentBestVersion = longVersion;
currentBest = graphUri;
}
}
return currentBest;
}
public static Uri? GetRecentDataExtractedVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data", false);
}
public static Uri? GetRecentDataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data");
}
public static Uri? GetRecentMetadataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "metadata");
}
public static long GetNewVersion()
{
// UTC Timestamp
return long.Parse(Convert.ToString((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds));
}
}
\ No newline at end of file
......@@ -4,7 +4,7 @@
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>0.1.4</Version></PropertyGroup>
<Version>0.1.5</Version></PropertyGroup>
<ItemGroup>
......
......@@ -12,7 +12,7 @@
<Description>A library generated from a OpenAPI doc</Description>
<Copyright>No Copyright</Copyright>
<RootNamespace>Org.OpenAPITools</RootNamespace>
<Version>0.1.4</Version>
<Version>0.1.5</Version>
<DocumentationFile>bin\$(Configuration)\$(TargetFramework)\Org.OpenAPITools.xml</DocumentationFile>
<RepositoryUrl>https://github.com/GIT_USER_ID/GIT_REPO_ID.git</RepositoryUrl>
<RepositoryType>git</RepositoryType>
......