Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • Issue/1788-extractionCronjob
  • Issue/1792-newMetadataStructure
  • Issue/2518-docs
  • Issue/2769-migrateCron
  • dev
  • gitkeep
  • main
  • test
  • v0.1.0
  • v0.1.1
  • v0.1.10
  • v0.1.11
  • v0.1.2
  • v0.1.3
  • v0.1.4
  • v0.1.5
  • v0.1.6
  • v0.1.7
  • v0.1.8
  • v0.1.9
20 results

Target

Select target project
  • coscine/backend/scripts/metadataextractorcron
1 result
Select Git revision
  • Issue/1788-extractionCronjob
  • Issue/1792-newMetadataStructure
  • Issue/2518-docs
  • Issue/2769-migrateCron
  • dev
  • gitkeep
  • main
  • test
  • v0.1.0
  • v0.1.1
  • v0.1.10
  • v0.1.11
  • v0.1.2
  • v0.1.3
  • v0.1.4
  • v0.1.5
  • v0.1.6
  • v0.1.7
  • v0.1.8
  • v0.1.9
20 results
Show changes

Commits on Source 4

...@@ -6,12 +6,12 @@ using Coscine.ResourceTypes.Base; ...@@ -6,12 +6,12 @@ using Coscine.ResourceTypes.Base;
using Coscine.ResourceTypes.Base.Models; using Coscine.ResourceTypes.Base.Models;
using Org.OpenAPITools.Api; using Org.OpenAPITools.Api;
using Org.OpenAPITools.Model; using Org.OpenAPITools.Model;
using VDS.RDF.Query;
using VDS.RDF; using VDS.RDF;
using MetadataExtractorCron.Util; using MetadataExtractorCron.Util;
using VDS.RDF.Parsing; using VDS.RDF.Parsing;
using System.Globalization; using System.Globalization;
using System.Security.Cryptography; using System.Security.Cryptography;
using Coscine.Metadata.Util;
namespace MetadataExtractorCron.Extractors; namespace MetadataExtractorCron.Extractors;
...@@ -25,14 +25,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -25,14 +25,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private readonly RdfStoreConnector _rdfStoreConnector; private readonly RdfStoreConnector _rdfStoreConnector;
private readonly MetadataGraphsCreator _metadataGraphsCreator; private readonly MetadataGraphsCreator _metadataGraphsCreator;
private const string metadataExtractionVersionUrl = "https://purl.org/coscine/terms/metatadataextraction#version";
private const string dcatdistributionUrl = "http://www.w3.org/ns/dcat#distribution";
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
public CoscineMetadataExtractor() public CoscineMetadataExtractor()
{ {
_configuration = new ConsulConfiguration(); _configuration = new ConsulConfiguration();
...@@ -72,7 +64,7 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -72,7 +64,7 @@ public class CoscineMetadataExtractor : IMetadataExtractor
foreach (var file in fileInfos.Where((fileInfo) => fileInfo.HasBody)) foreach (var file in fileInfos.Where((fileInfo) => fileInfo.HasBody))
{ {
if (file.BodyBytes > 16 * 1000 * 1000) if (file.BodyBytes > VersionUtil.DetectionByteLimit)
{ {
Console.WriteLine($"Skipping {file.Key} on {resourceId} since it has a too large byte size"); Console.WriteLine($"Skipping {file.Key} on {resourceId} since it has a too large byte size");
continue; continue;
...@@ -114,63 +106,31 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -114,63 +106,31 @@ public class CoscineMetadataExtractor : IMetadataExtractor
private void CreateMetadataSetsIfDontExist(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos) private void CreateMetadataSetsIfDontExist(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{ {
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}"; var existingGraphs = _rdfStoreConnector.GetMetadataIds(resourceId, entry.Key);
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
if (!existingGraphs.Any()) if (!existingGraphs.Any())
{ {
Console.WriteLine($"Creating graphs for {newFileGraphName} since they did not exist before!"); Console.WriteLine($"Creating graphs for {resourceId}, {entry.Key} since they did not exist before!");
_metadataGraphsCreator.CreateGraphs(resourceId, entry, fileInfos); GraphStorer.StoreGraphs(_metadataGraphsCreator.CreateGraphs(
} resourceId,
} entry.Key,
true,
private IEnumerable<Uri> ListGraphs(string id) true
{ ), _rdfStoreConnector);
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = _rdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
} }
return graphs;
} }
private bool HasCurrentMetadataExtracted(string resourceId, ResourceEntry entry) private bool HasCurrentMetadataExtracted(string resourceId, ResourceEntry entry)
{ {
var resourceGraphName = $"{_resourceUrlPrefix}/{resourceId}"; var existingGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key);
var newFileGraphName = $"{resourceGraphName}/{entry.Key}"; var existingExtractedGraphs = _rdfStoreConnector.GetDataIds(resourceId, entry.Key, true);
if (!newFileGraphName.EndsWith("/"))
{
newFileGraphName += "/";
}
var existingGraphs = ListGraphs(newFileGraphName);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs); var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingGraphs); var recentDataExtractedVersion = VersionUtil.GetRecentDataExtractedVersion(existingExtractedGraphs);
return return
recentDataExtractedVersion != null recentDataExtractedVersion != null
&& recentDataVersion != null && recentDataVersion != null
&& recentDataExtractedVersion.AbsoluteUri.Contains(recentDataVersion.AbsoluteUri) && recentDataExtractedVersion.Contains(recentDataVersion)
&& recentDataExtractedVersion.AbsoluteUri != recentDataVersion.AbsoluteUri; && recentDataExtractedVersion != recentDataVersion;
} }
private async Task<MetadataOutput> ExtractMetadata(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions) private async Task<MetadataOutput> ExtractMetadata(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions)
...@@ -195,9 +155,9 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -195,9 +155,9 @@ public class CoscineMetadataExtractor : IMetadataExtractor
var extractedOutputs = await _apiClient.PostMetadataExtractorWorkerAsync( var extractedOutputs = await _apiClient.PostMetadataExtractorWorkerAsync(
givenStream, givenStream,
$"{resourceId}/{entry.Key.Replace("\\", "/")}", $"{resourceId}/{entry.Key.Replace("\\", "/")}",
null, null!,
entry.Created?.ToString("o", CultureInfo.InvariantCulture), entry.Created?.ToString("o", CultureInfo.InvariantCulture)!,
entry.Modified?.ToString("o", CultureInfo.InvariantCulture) entry.Modified?.ToString("o", CultureInfo.InvariantCulture)!
); );
return extractedOutputs[0]; return extractedOutputs[0];
...@@ -221,9 +181,8 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -221,9 +181,8 @@ public class CoscineMetadataExtractor : IMetadataExtractor
newFileGraphNameAddon += "/"; newFileGraphNameAddon += "/";
} }
var existingGraphs = ListGraphs(newFileGraphNameAddon); var recentDataVersion = _rdfStoreConnector.GetDataId(resourceId, entry.Key);
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs); var recentMetadataVersion = _rdfStoreConnector.GetMetadataId(resourceId, entry.Key);
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
await CreateHashData(resourceId, entry, resourceTypeDefinition, resourceTypeOptions, newFileGraphNameAddon, recentDataVersion); await CreateHashData(resourceId, entry, resourceTypeDefinition, resourceTypeOptions, newFileGraphNameAddon, recentDataVersion);
...@@ -232,14 +191,14 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -232,14 +191,14 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The recent data version is null and can't be used."); throw new NullReferenceException("The recent data version is null and can't be used.");
} }
var recentDataExtractedVersion = new Uri(recentDataVersion.AbsoluteUri + "&extracted=true"); var recentDataExtractedVersion = new Uri(recentDataVersion + "&extracted=true");
if (recentMetadataVersion is null) if (recentMetadataVersion is null)
{ {
throw new NullReferenceException("The recent metadata version is null and can't be used."); throw new NullReferenceException("The recent metadata version is null and can't be used.");
} }
var recentMetadataExtractedVersion = new Uri(recentMetadataVersion.AbsoluteUri + "&extracted=true"); var recentMetadataExtractedVersion = new Uri(recentMetadataVersion + "&extracted=true");
var tripleStore = new TripleStore(); var tripleStore = new TripleStore();
tripleStore.LoadFromString(extractedMetadata.Metadata, new TriGParser(TriGSyntax.Recommendation)); tripleStore.LoadFromString(extractedMetadata.Metadata, new TriGParser(TriGSyntax.Recommendation));
...@@ -248,57 +207,22 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -248,57 +207,22 @@ public class CoscineMetadataExtractor : IMetadataExtractor
GraphStorer.StoreGraphs(tripleStore.Graphs, _rdfStoreConnector); GraphStorer.StoreGraphs(tripleStore.Graphs, _rdfStoreConnector);
var trellisGraph = _rdfStoreConnector.GetGraph(trellisGraphUri); GraphStorer.StoreGraphs(
var triples = new List<Triple>(); _metadataGraphsCreator.UpdateExtractionGraphs(
resourceId,
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentDataExtractedVersion.AbsoluteUri, triples); entry.Key,
AddToTrellis(trellisGraph, rdfSourceUri, newFileGraphName, recentMetadataExtractedVersion.AbsoluteUri, triples); recentDataVersion,
GraphStorer.AddToGraph(trellisGraph, triples, _rdfStoreConnector); recentMetadataVersion,
metadataExtractorVersion
var newDataFileGraphName = $"{newFileGraphName}/@type=data"; ),
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata"; _rdfStoreConnector);
}
var dataGraph = CreateOrGetGraph(newDataFileGraphName);
var metadataGraph = CreateOrGetGraph(newMetadataFileGraphName); private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, string? recentDataVersion)
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(new Uri(newDataFileGraphName)),
dataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
dataGraph.CreateUriNode(recentDataExtractedVersion)
));
dataGraph.Assert(new Triple(
dataGraph.CreateUriNode(recentDataExtractedVersion),
dataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
dataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(new Uri(newMetadataFileGraphName)),
metadataGraph.CreateUriNode(new Uri(dcatdistributionUrl)),
metadataGraph.CreateUriNode(recentMetadataExtractedVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataExtractedVersion),
metadataGraph.CreateUriNode(new Uri(metadataExtractionVersionUrl)),
metadataGraph.CreateLiteralNode(metadataExtractorVersion)
));
metadataGraph.Assert(new Triple(
metadataGraph.CreateUriNode(recentMetadataVersion),
metadataGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#isMetadataOf")),
metadataGraph.CreateUriNode(recentDataVersion)
));
var provenanceGraphs = new List<IGraph> { dataGraph, metadataGraph };
GraphStorer.StoreGraphs(provenanceGraphs, _rdfStoreConnector);
}
private async Task CreateHashData(string resourceId, ResourceEntry entry, BaseResourceType resourceTypeDefinition, Dictionary<string, string>? resourceTypeOptions, string newFileGraphNameAddon, Uri? recentDataVersion)
{ {
var dataGraphName = $"{newFileGraphNameAddon}@type=data"; var dataGraphName = $"{newFileGraphNameAddon}@type=data";
var dataGraph = CreateOrGetGraph(dataGraphName); var dataGraph = CreateOrGetGraph(dataGraphName);
var hashTriples = new List<Triple>();
var loadedEntry = await resourceTypeDefinition.LoadEntry(resourceId, entry.Key, resourceTypeOptions); var loadedEntry = await resourceTypeDefinition.LoadEntry(resourceId, entry.Key, resourceTypeOptions);
if (loadedEntry is null) if (loadedEntry is null)
...@@ -306,25 +230,16 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -306,25 +230,16 @@ public class CoscineMetadataExtractor : IMetadataExtractor
throw new NullReferenceException("The resulting stream of the loaded entry is null, when trying to hash the data."); throw new NullReferenceException("The resulting stream of the loaded entry is null, when trying to hash the data.");
} }
var sha512Hash = Convert.ToBase64String(HashUtil.HashData(loadedEntry, HashAlgorithmName.SHA512)); var defaultHash = Convert.ToBase64String(HashUtil.HashData(loadedEntry));
var dataGraphId = recentDataVersion;
var hashGraphId = new Uri($"{dataGraphId?.AbsoluteUri}&hash={Guid.NewGuid()}");
var dataGraphSubject = dataGraph.CreateUriNode(dataGraphId);
var hashSubject = dataGraph.CreateUriNode(hashGraphId);
hashTriples.Add(new Triple(dataGraphSubject, if (recentDataVersion is null)
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashType")), {
hashSubject)); return;
hashTriples.Add(new Triple(hashSubject, }
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashFunction")),
dataGraph.CreateLiteralNode("SHA512")));
hashTriples.Add(new Triple(hashSubject,
dataGraph.CreateUriNode(new Uri("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hashValue")),
dataGraph.CreateLiteralNode(sha512Hash, new Uri("http://www.w3.org/2001/XMLSchema#hexBinary"))));
GraphStorer.AddToGraph(dataGraph, hashTriples, _rdfStoreConnector); GraphStorer.AddToGraph(dataGraph, HashUtil.CreateHashTriples(
dataGraph, new Uri(recentDataVersion), defaultHash
), _rdfStoreConnector);
} }
private static void FormatResultMetadata(TripleStore tripleStore, Uri dataExtractGraph, Uri metadataExtractGraph) private static void FormatResultMetadata(TripleStore tripleStore, Uri dataExtractGraph, Uri metadataExtractGraph)
...@@ -347,30 +262,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -347,30 +262,6 @@ public class CoscineMetadataExtractor : IMetadataExtractor
} }
} }
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl) private IGraph CreateOrGetGraph(string graphUrl)
{ {
var entryAlreadyExists = _rdfStoreConnector.HasGraph(graphUrl); var entryAlreadyExists = _rdfStoreConnector.HasGraph(graphUrl);
...@@ -382,22 +273,4 @@ public class CoscineMetadataExtractor : IMetadataExtractor ...@@ -382,22 +273,4 @@ public class CoscineMetadataExtractor : IMetadataExtractor
}; };
} }
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
} }
\ No newline at end of file
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<Version>0.1.4</Version></PropertyGroup> <Version>0.1.5</Version></PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Coscine.Database" Version="2.*-*" /> <PackageReference Include="Coscine.Database" Version="2.*-*" />
......
...@@ -9,32 +9,14 @@ public static class GraphStorer ...@@ -9,32 +9,14 @@ public static class GraphStorer
{ {
foreach (var graphUri in graphUris) foreach (var graphUri in graphUris)
{ {
Console.WriteLine($" ({graphUri.BaseUri})"); rdfStoreConnector.AddGraph(graphUri);
if (rdfStoreConnector.HasGraph(graphUri.BaseUri))
{
Console.WriteLine($" - Graph {graphUri.BaseUri} exists");
// Clear the existing graph from the store
rdfStoreConnector.ClearGraph(graphUri.BaseUri);
Console.WriteLine($" - Cleared Graph {graphUri.BaseUri}");
}
// Chunking since the size otherwise can be too large
foreach (var triples in graphUri.Triples.Chunk(100))
{
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graphUri.BaseUri, triples, Enumerable.Empty<Triple>());
}
Console.WriteLine($" - Graph {graphUri.BaseUri} added successfully");
Console.WriteLine();
} }
} }
public static void AddToGraph(IGraph graph, IEnumerable<Triple> triples, RdfStoreConnector rdfStoreConnector) public static void AddToGraph(IGraph graph, IEnumerable<Triple> triples, RdfStoreConnector rdfStoreConnector)
{ {
Console.WriteLine($" - Adding Triples to {graph.BaseUri}"); Console.WriteLine($" - Adding Triples to {graph.BaseUri}");
rdfStoreConnector.ReadWriteSparqlConnector.UpdateGraph(graph.BaseUri, triples, Enumerable.Empty<Triple>()); rdfStoreConnector.AddToGraph(graph, triples);
Console.WriteLine($" - Triples added to Graph {graph.BaseUri} successfully"); Console.WriteLine($" - Triples added to Graph {graph.BaseUri} successfully");
Console.WriteLine(); Console.WriteLine();
} }
......
using System.Security.Cryptography;
namespace MetadataExtractorCron.Util;
public static class HashUtil
{
private static HashAlgorithm GetHashAlgorithm(HashAlgorithmName hashAlgorithmName)
{
if (hashAlgorithmName == HashAlgorithmName.MD5)
return MD5.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA1)
return SHA1.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA256)
return SHA256.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA384)
return SHA384.Create();
if (hashAlgorithmName == HashAlgorithmName.SHA512)
return SHA512.Create();
throw new CryptographicException($"Unknown hash algorithm \"{hashAlgorithmName.Name}\".");
}
public static byte[] HashData(Stream data,
HashAlgorithmName hashAlgorithm)
{
using var hashAlgorithmObject = GetHashAlgorithm(hashAlgorithm);
return hashAlgorithmObject.ComputeHash(data);
}
}
\ No newline at end of file
using Coscine.Metadata;
using Coscine.ResourceTypes.Base.Models;
using System.Globalization;
using VDS.RDF;
using VDS.RDF.Parsing;
using VDS.RDF.Query;
namespace MetadataExtractorCron.Util;
/// <summary>
/// Derived from MetadataMigrator
/// </summary>
public class MetadataGraphsCreator
{
private const string partOfUri = "http://purl.org/dc/terms/isPartOf";
private const string aUri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private const string basicContainerUri = "http://www.w3.org/ns/ldp#BasicContainer";
private const string nonRdfSourceUri = "http://www.w3.org/ns/ldp#NonRDFSource";
private const string rdfSourceUri = "http://www.w3.org/ns/ldp#RDFSource";
private const string dcatcatalogUri = "http://www.w3.org/ns/dcat#catalog";
private const string dcatCatalogClassUri = "http://www.w3.org/ns/dcat#Catalog";
private const string dctermsIdentifierUri = "http://purl.org/dc/terms/identifier";
private const string dctermsModifiedUri = "http://purl.org/dc/terms/modified";
private const string fdpMetadataServiceUri = "http://purl.org/fdp/fdp-o#MetadataService";
private const string fdphasMetadataUri = "http://purl.org/fdp/fdp-o#hasMetadata";
private const string provEntityUri = "http://www.w3.org/ns/prov#Entity";
private const string provGeneratedAtTimeUri = "http://www.w3.org/ns/prov#generatedAtTime";
private const string provWasRevisionOfUri = "http://www.w3.org/ns/prov#wasRevisionOfNode";
private const string ldpDescribedByUri = "http://www.w3.org/ns/ldp#describedBy";
private const string resourceUrlPrefix = "https://purl.org/coscine/resources";
private const string trellisGraphUri = "http://www.trellisldp.org/ns/trellis#PreferServerManaged";
private RdfStoreConnector RdfStoreConnector { get; }
public MetadataGraphsCreator(RdfStoreConnector rdfStoreConnector)
{
RdfStoreConnector = rdfStoreConnector;
}
public void CreateGraphs(string resourceId, ResourceEntry entry, IEnumerable<ResourceEntry> fileInfos)
{
var trellisGraph = RdfStoreConnector.GetGraph(trellisGraphUri);
var graphs = new List<IGraph>();
var triples = new List<Triple>();
var resourceGraphName = $"{resourceUrlPrefix}/{resourceId}";
var fileGraphs = fileInfos.Select((entry) =>
{
var entryGraphName = $"{resourceGraphName}/{entry.Key}";
if (!entryGraphName.EndsWith("/"))
{
entryGraphName += "/";
}
return new Uri(entryGraphName);
});
var newFileGraphName = $"{resourceGraphName}/{entry.Key}";
Console.WriteLine($"Migrating {newFileGraphName}");
var version = VersionUtil.GetNewVersion();
var newMetadataFileGraphName = $"{newFileGraphName}/@type=metadata";
var newDataFileGraphName = $"{newFileGraphName}/@type=data";
var newMetadataVersionFileGraphName = $"{newFileGraphName}/@type=metadata&version={version}";
var newDataVersionFileGraphName = $"{newFileGraphName}/@type=data&version={version}";
var newFileGraph = CreateOrGetGraph(newFileGraphName);
var fileNode = newFileGraph.CreateUriNode(new Uri(newFileGraphName));
graphs.Add(newFileGraph);
// Set relation to resource, if a plain file in no folder
if (!entry.Key.Any((character) => character == '/'))
{
AddToTrellis(trellisGraph, basicContainerUri, resourceGraphName, newFileGraphName, triples);
}
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(aUri)), newFileGraph.CreateUriNode(new Uri(fdpMetadataServiceUri))));
AddFilesToAFolder(trellisGraph, fileGraphs, new Uri(newFileGraphName), newFileGraph, newFileGraphName, triples);
var metadataFileGraph = SetMetadataGraph(trellisGraph, graphs, newMetadataFileGraphName, newFileGraph, newFileGraphName, triples);
var dataFileGraph = SetDataGraph(trellisGraph, graphs, newDataFileGraphName, newFileGraph, newFileGraphName, metadataFileGraph.BaseUri.AbsoluteUri, triples);
var existingGraphs = ListGraphs(newFileGraphName + "/");
SetDataVersionGraph(graphs, newDataVersionFileGraphName, dataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
SetMetadataVersionGraph(graphs, newMetadataVersionFileGraphName, metadataFileGraph, existingGraphs, trellisGraph, newFileGraphName, triples);
GraphStorer.StoreGraphs(graphs, RdfStoreConnector);
GraphStorer.AddToGraph(trellisGraph, triples, RdfStoreConnector);
}
private static void AddFilesToAFolder(IGraph trellisGraph, IEnumerable<Uri> fileGraphs, Uri fileGraph, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
// Add all files to a folder
foreach (var otherFileGraph in fileGraphs)
{
// TODO: Deal with multiple levels of files
if (otherFileGraph.AbsoluteUri != fileGraph.AbsoluteUri
&& otherFileGraph.AbsoluteUri.Contains(fileGraph.AbsoluteUri + "/")
&& !otherFileGraph.AbsoluteUri.Contains("&data")
&& !otherFileGraph.AbsoluteUri.Contains("?type=")
&& !otherFileGraph.AbsoluteUri.Contains("&type=")
&& !otherFileGraph.AbsoluteUri.Contains("@type="))
{
var otherFileNode = newFileGraph.CreateUriNode(otherFileGraph);
newFileGraph.Assert(new Triple(newFileGraph.CreateUriNode(new Uri(fileUri)), newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), otherFileNode));
AddToTrellis(trellisGraph, basicContainerUri, fileUri, otherFileGraph.AbsoluteUri, triples);
}
}
}
private IGraph SetMetadataGraph(IGraph trellisGraph, List<IGraph> graphs, string newMetadataFileGraphName, IGraph newFileGraph, string fileUri, ICollection<Triple> triples)
{
var metadataFileNode = newFileGraph.CreateUriNode(new Uri(newMetadataFileGraphName));
var metadataFileGraph = CreateOrGetGraph(newMetadataFileGraphName);
graphs.Add(metadataFileGraph);
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, newMetadataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), metadataFileNode));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(fdphasMetadataUri)), metadataFileNode));
metadataFileGraph.Assert(new Triple(
Tools.CopyNode(metadataFileNode, metadataFileGraph),
metadataFileGraph.CreateUriNode(new Uri(aUri)),
metadataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))
));
return metadataFileGraph;
}
private IGraph SetDataGraph(IGraph trellisGraph, List<IGraph> graphs, string newDataFileGraphName, IGraph newFileGraph, string fileUri, string metadataFileUri, ICollection<Triple> triples)
{
var dataFileNode = newFileGraph.CreateUriNode(new Uri(newDataFileGraphName));
var dataFileGraph = CreateOrGetGraph(newDataFileGraphName);
graphs.Add(dataFileGraph);
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, newDataFileGraphName, triples);
var fileNode = newFileGraph.CreateUriNode(new Uri(fileUri));
newFileGraph.Assert(new Triple(fileNode, newFileGraph.CreateUriNode(new Uri(dcatcatalogUri)), dataFileNode));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(dcatCatalogClassUri))));
dataFileGraph.Assert(new Triple(Tools.CopyNode(dataFileNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(ldpDescribedByUri)), dataFileGraph.CreateUriNode(new Uri(metadataFileUri))));
return dataFileGraph;
}
private void SetDataVersionGraph(List<IGraph> graphs, string newDataVersionFileGraphName, IGraph dataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentDataVersion = VersionUtil.GetRecentDataVersion(existingGraphs);
IGraph currentDataVersionGraph;
if (recentDataVersion == null)
{
currentDataVersionGraph = new Graph()
{
BaseUri = new Uri(newDataVersionFileGraphName),
};
}
else
{
currentDataVersionGraph = RdfStoreConnector.GetGraph(recentDataVersion);
}
var currentDataVersionNode = currentDataVersionGraph.CreateUriNode(currentDataVersionGraph.BaseUri);
currentDataVersionGraph.Assert(new Triple(currentDataVersionNode, currentDataVersionGraph.CreateUriNode(new Uri(dctermsIdentifierUri)), currentDataVersionGraph.CreateLiteralNode(
currentDataVersionGraph.BaseUri.AbsoluteUri,
new Uri(XmlSpecsHelper.XmlSchemaDataTypeString)
)));
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(aUri)), dataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!dataFileGraph.ContainsTriple(provTriple))
{
dataFileGraph.Assert(provTriple);
dataFileGraph.Assert(new Triple(
dataFileGraph.CreateUriNode(dataFileGraph.BaseUri),
dataFileGraph.CreateUriNode(new Uri("http://www.w3.org/ns/dcat#dataset")),
Tools.CopyNode(currentDataVersionNode, dataFileGraph)
));
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), dataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, nonRdfSourceUri, fileUri, currentDataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentDataVersion != null && recentDataVersion.AbsoluteUri != currentDataVersionGraph.BaseUri.AbsoluteUri)
{
var recentDataVersionNode = dataFileGraph.CreateUriNode(recentDataVersion);
dataFileGraph.Assert(new Triple(Tools.CopyNode(currentDataVersionNode, dataFileGraph), dataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentDataVersionNode));
}
graphs.Add(currentDataVersionGraph);
}
private void SetMetadataVersionGraph(List<IGraph> graphs, string newMetadataVersionFileGraphName, IGraph metadataFileGraph, IEnumerable<Uri> existingGraphs, IGraph trellisGraph, string fileUri, ICollection<Triple> triples)
{
var recentMetadataVersion = VersionUtil.GetRecentMetadataVersion(existingGraphs);
IGraph currentMetadataVersionGraph;
if (recentMetadataVersion == null)
{
currentMetadataVersionGraph = new Graph()
{
BaseUri = new Uri(newMetadataVersionFileGraphName),
};
}
else
{
currentMetadataVersionGraph = RdfStoreConnector.GetGraph(recentMetadataVersion);
}
var currentMetadataVersionNode = currentMetadataVersionGraph.CreateUriNode(currentMetadataVersionGraph.BaseUri);
// PROV Info
var provTriple = new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(aUri)), metadataFileGraph.CreateUriNode(new Uri(provEntityUri)));
if (!metadataFileGraph.ContainsTriple(provTriple))
{
metadataFileGraph.Assert(provTriple);
metadataFileGraph.Assert(new Triple(
metadataFileGraph.CreateUriNode(metadataFileGraph.BaseUri),
metadataFileGraph.CreateUriNode(new Uri("http://purl.org/fdp/fdp-o#hasMetadata")),
Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph)
));
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provGeneratedAtTimeUri)), metadataFileGraph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)));
AddToTrellis(trellisGraph, rdfSourceUri, fileUri, currentMetadataVersionGraph.BaseUri.AbsoluteUri, triples);
}
if (recentMetadataVersion != null && recentMetadataVersion.AbsoluteUri != currentMetadataVersionGraph.BaseUri.AbsoluteUri)
{
var recentMetadataVersionNode = metadataFileGraph.CreateUriNode(recentMetadataVersion);
metadataFileGraph.Assert(new Triple(Tools.CopyNode(currentMetadataVersionNode, metadataFileGraph), metadataFileGraph.CreateUriNode(new Uri(provWasRevisionOfUri)), recentMetadataVersionNode));
}
graphs.Add(currentMetadataVersionGraph);
}
private static void AddToTrellis(IGraph trellisGraph, string ldpAssignment, string thePartUri, string graphUri, ICollection<Triple> triples)
{
var setGraphNode = trellisGraph.CreateUriNode(new Uri(graphUri));
var setThePartNode = trellisGraph.CreateUriNode(new Uri(thePartUri));
var triple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(partOfUri)),
setThePartNode
);
if (!trellisGraph.ContainsTriple(triple))
{
triples.Add(triple);
trellisGraph.Assert(triple);
var assignmentTriple = new Triple(
setGraphNode,
trellisGraph.CreateUriNode(new Uri(aUri)),
trellisGraph.CreateUriNode(new Uri(ldpAssignment))
);
triples.Add(assignmentTriple);
trellisGraph.Assert(assignmentTriple);
AddModifiedDate(trellisGraph, graphUri, triples);
}
}
private IGraph CreateOrGetGraph(string graphUrl)
{
var entryAlreadyExists = RdfStoreConnector.HasGraph(graphUrl);
return entryAlreadyExists
? RdfStoreConnector.GetGraph(graphUrl)
: new Graph()
{
BaseUri = new Uri(graphUrl)
};
}
public IEnumerable<Uri> ListGraphs(string id)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"SELECT DISTINCT ?g
WHERE { GRAPH ?g { ?s ?p ?o }
FILTER(contains(str(?g), @graph)) }"
};
cmdString.SetLiteral("graph", id);
var resultSet = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(cmdString.ToString());
var graphs = new List<Uri>();
foreach (SparqlResult r in resultSet)
{
var uriNode = r.Value("g") as UriNode;
if (uriNode is not null)
{
graphs.Add(uriNode.Uri);
}
}
return graphs;
}
private static void AddModifiedDate(IGraph graph, string root, ICollection<Triple> triples)
{
var dcTermsModifiedNode = graph.CreateUriNode(new Uri(dctermsModifiedUri));
var rootNode = graph.CreateUriNode(new Uri(root));
if (!graph.GetTriplesWithSubjectPredicate(rootNode, dcTermsModifiedNode).Any())
{
var triple = new Triple(
rootNode,
dcTermsModifiedNode,
graph.CreateLiteralNode(
DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture),
new Uri(XmlSpecsHelper.XmlSchemaDataTypeDateTime)
)
);
triples.Add(triple);
graph.Assert(triple);
}
}
}
\ No newline at end of file
using System.Web;
namespace MetadataExtractorCron;
public static class VersionUtil
{
public static Uri? GetRecentVersion(IEnumerable<Uri> graphUris, string? filter = null, bool notFilterExtracted = true)
{
var currentBest = graphUris.FirstOrDefault();
var currentBestVersion = 0L;
foreach (var graphUri in graphUris)
{
var queryDictionary = HttpUtility.ParseQueryString(new Uri(graphUri.ToString().Replace("@", "?")).Query);
var version = queryDictionary["version"];
if (version == null || !long.TryParse(version, out long longVersion))
{
continue;
}
if (longVersion > currentBestVersion
&& (filter == null || queryDictionary["type"] == filter)
&&
((notFilterExtracted && queryDictionary["extracted"] == null)
|| (!notFilterExtracted && queryDictionary["extracted"] != null))
)
{
currentBestVersion = longVersion;
currentBest = graphUri;
}
}
return currentBest;
}
public static Uri? GetRecentDataExtractedVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data", false);
}
public static Uri? GetRecentDataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "data");
}
public static Uri? GetRecentMetadataVersion(IEnumerable<Uri> graphUris)
{
return GetRecentVersion(graphUris, "metadata");
}
public static long GetNewVersion()
{
// UTC Timestamp
return long.Parse(Convert.ToString((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds));
}
}
\ No newline at end of file
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<Version>0.1.4</Version></PropertyGroup> <Version>0.1.5</Version></PropertyGroup>
<ItemGroup> <ItemGroup>
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
<Description>A library generated from a OpenAPI doc</Description> <Description>A library generated from a OpenAPI doc</Description>
<Copyright>No Copyright</Copyright> <Copyright>No Copyright</Copyright>
<RootNamespace>Org.OpenAPITools</RootNamespace> <RootNamespace>Org.OpenAPITools</RootNamespace>
<Version>0.1.4</Version> <Version>0.1.5</Version>
<DocumentationFile>bin\$(Configuration)\$(TargetFramework)\Org.OpenAPITools.xml</DocumentationFile> <DocumentationFile>bin\$(Configuration)\$(TargetFramework)\Org.OpenAPITools.xml</DocumentationFile>
<RepositoryUrl>https://github.com/GIT_USER_ID/GIT_REPO_ID.git</RepositoryUrl> <RepositoryUrl>https://github.com/GIT_USER_ID/GIT_REPO_ID.git</RepositoryUrl>
<RepositoryType>git</RepositoryType> <RepositoryType>git</RepositoryType>
......