Select Git revision
RdfStoreConnector.cs
-
Petar Hristov authoredPetar Hristov authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RdfStoreConnector.cs 21.19 KiB
using Coscine.Metadata.Models;
using Coscine.Metadata.Util;
using System;
using System.Collections.Generic;
using System.Linq;
using VDS.RDF;
using VDS.RDF.Query;
using VDS.RDF.Shacl;
namespace Coscine.Metadata
{
/// <summary>
/// Provides extended RDF functionality
/// </summary>
public partial class RdfStoreConnector : MetadataRdfStoreConnector
{
public readonly string UserUrlPrefix = "https://purl.org/coscine/users";
public RdfStoreConnector(string sparqlEndpoint = "http://localhost:8890/sparql") : base(sparqlEndpoint)
{
}
public IEnumerable<Uri> GetApplicationProfiles()
{
var commandString = new SparqlParameterizedString
{
CommandText = @"
SELECT DISTINCT ?s
WHERE
{
?s a <http://www.w3.org/ns/shacl#NodeShape>
}"
};
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(commandString.ToString()));
return resultSet.Select(x => new Uri(x.Value("s").ToString()));
}
private Triple GetDefinitionTriple(IGraph graph, Uri graphName)
{
var triples = graph.GetTriplesWithSubjectPredicate(
graph.CreateUriNode(graphName),
graph.CreateUriNode(new Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")));
if (triples.Count() == 1)
{
return triples.First();
}
else
{
throw new Exception($"The input must specify exactly 1 schema. Your input has {triples.Count()} schemas.");
}
}
public Uri GetApplicationProfileUri(IGraph graph, Uri graphName)
{
var triple = GetDefinitionTriple(graph, graphName);
return FormatApplicationProfileUrl(triple);
}
public IEnumerable<Uri> GetVocabularies(IGraph graph)
{
return graph.GetTriplesWithPredicate(new Uri("http://www.w3.org/ns/shacl#class"))
.Where(x => x.Object.NodeType == NodeType.Uri)
.Select(x => new Uri(x.Object.ToString()))
.Distinct();
}
public IEnumerable<Uri> GetClasses(IGraph graph)
{
return graph.GetTriplesWithPredicate(new Uri("http://www.w3.org/ns/shacl#class"))
.Where(x => x.Object.NodeType == NodeType.Uri)
.Select(x => new Uri(x.Object.ToString()))
.Distinct();
}
public bool ValidateShacl(IGraph graph, Uri graphName, Uri? applicationProfileUri = null)
{
// Create a copy of the input graph
var dataGraph = new Graph();
dataGraph.Merge(graph);
if (applicationProfileUri == null)
{
applicationProfileUri = GetApplicationProfileUri(dataGraph, graphName);
}
var shapesGraph = GetGraph(applicationProfileUri);
ResolveOwlImports(shapesGraph);
foreach (var classElement in GetClasses(shapesGraph))
{
var classGraph = GetClassGraph(classElement);
dataGraph.Merge(classGraph);
}
var processor = new ShapesGraph(shapesGraph);
var conforms = processor.Conforms(dataGraph);
return conforms;
}
public void ResolveOwlImports(IGraph shapesGraph)
{
var toAnalyzeGraphs = new Queue<IGraph>();
toAnalyzeGraphs.Enqueue(shapesGraph);
var visitedGraphs = new List<Uri> { shapesGraph.BaseUri };
var owlImports = new Uri("http://www.w3.org/2002/07/owl#imports");
while (toAnalyzeGraphs.Count > 0)
{
var currentGraph = toAnalyzeGraphs.Dequeue();
foreach (var nextGraphNode in currentGraph
.GetTriplesWithPredicate(owlImports)
.Select((triple) => triple.Object)
.ToList())
{
if (nextGraphNode is IUriNode)
{
var nextGraphUri = (nextGraphNode as IUriNode).Uri;
if (!visitedGraphs.Contains(nextGraphUri) && HasGraph(nextGraphUri))
{
visitedGraphs.Add(nextGraphUri);
var nextGraph = GetGraph(nextGraphUri);
shapesGraph.Merge(nextGraph);
toAnalyzeGraphs.Enqueue(nextGraph);
}
}
}
}
}
public IGraph GetClassGraph(Uri classElement)
{
// construct graph with all instances of classElement
var cmdString = new SparqlParameterizedString
{
CommandText = @"
CONSTRUCT {
?s a @classElement .
?s ?p ?o
}
WHERE {
?s a ?class .
?class <http://www.w3.org/2000/01/rdf-schema#subClassOf>* @classElement .
?s ?p ?o
}"
};
cmdString.SetUri("classElement", classElement);
var resultGraph = WrapRequest(() => QueryEndpoint.QueryWithResultGraph(cmdString.ToString()));
return resultGraph;
}
private static Uri FormatApplicationProfileUrl(Triple triple)
{
var objectString = triple.Object.ToString();
if (objectString[^1] != '/')
{
objectString += '/';
}
return new Uri(objectString);
}
public Dictionary<string, string> GetVocabularyLabels(IGraph graph, string langSuffix)
{
var triples = graph.GetTriplesWithPredicate(new Uri("http://www.w3.org/2000/01/rdf-schema#label"));
var dict = triples.Where(x => x.Object.NodeType == NodeType.Literal)
.Where(x => ((LiteralNode)x.Object).Language == langSuffix)
.ToDictionary(triple => triple.Subject.ToString(), triple => ((LiteralNode)triple.Object).Value);
return dict;
}
public IEnumerable<Triple> GetLabelForSubject(Uri subject)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT (@baseGraph AS ?s) (rdfs:label AS ?p) ?o
WHERE {
@baseGraph rdfs:label ?o .
}"
};
cmdString.SetUri("baseGraph", subject);
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(cmdString.ToString()));
var triples = new List<Triple>();
foreach (SparqlResult r in resultSet)
{
triples.Add(new Triple(r.Value("s"), r.Value("p"), r.Value("o")));
}
return triples;
}
public IEnumerable<Triple> GetTriples(Uri graph, Uri obj, string searchterm = null, int member = 0, List<string> externalIds = null, int numberOfResults = 100, int offset = 0)
{
var graphFilter = graph != null ? "FROM @g " : "";
var objectFilter = obj != null ? "?s ?p @o . " : "";
var searchFilter = (searchterm != null) ? "filter contains(LCASE(?o), LCASE(@searchterm)) . " : "";
var membersAndExternalIdFilter = (member == 1 && externalIds != null) ?
@"{
SELECT DISTINCT ?s
WHERE {
?class ?p ?memberUrl;
org:organization ?s .
{
SELECT DISTINCT ?memberUrl
WHERE {
?memberUrl ?p ?value .
FILTER(?value IN ( " + PrepareIncludeString(externalIds) + @" ))
}
}
}
}"
: "";
var cmdString = new SparqlParameterizedString
{
CommandText =
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " +
"PREFIX foaf: <http://xmlns.com/foaf/0.1/> " +
"PREFIX org: <http://www.w3.org/ns/org#> " +
"SELECT DISTINCT ?s (rdfs:label AS ?p) ?o " +
graphFilter +
"WHERE { " +
objectFilter +
"?s rdfs:label ?o . " +
searchFilter +
membersAndExternalIdFilter +
"} " +
"LIMIT @numberOfResults " +
"OFFSET @offset "
};
// bind all the required values
if (graph != null)
{
cmdString.SetUri("g", graph);
}
if (obj != null)
{
cmdString.SetUri("o", obj);
}
if (searchterm != null)
{
cmdString.SetLiteral("searchterm", searchterm);
}
if (externalIds != null)
{
for (var i = 0; i < externalIds.Count; i++)
{
cmdString.SetLiteral("externalId" + i, externalIds.ElementAt(i));
}
}
cmdString.SetLiteral("numberOfResults", numberOfResults);
cmdString.SetLiteral("offset", offset);
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(cmdString.ToString()));
var baseUrls = new HashSet<Uri>();
var triples = new List<Triple>();
foreach (SparqlResult r in resultSet)
{
var baseUrl = r.Value("s").ToString();
if (member == 1 && baseUrl.IndexOf("#") != -1)
{
baseUrls.Add(new Uri(baseUrl[..baseUrl.LastIndexOf("#")]));
}
triples.Add(new Triple(r.Value("s"), r.Value("p"), r.Value("o")));
}
// add the top level organization
foreach (var url in baseUrls)
{
triples.AddRange(GetLabelForSubject(url));
}
return triples;
}
public IEnumerable<Triple> GetOrganizationByEntityId(string entityId)
{
var cmdString = new SparqlParameterizedString()
{
CommandText = @"
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?ror (rdfs:label AS ?p) ?name
WHERE {
?ror rdfs:label ?name .
{
SELECT DISTINCT ?ror
WHERE {
?ror ?p ?value .
FILTER( ?value IN ( @entityId ))
}
}
}
"
};
cmdString.SetLiteral("entityId", entityId);
using var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(cmdString.ToString()));
var triples = new List<Triple>();
foreach (SparqlResult r in resultSet)
{
triples.Add(new Triple(r.Value("ror"), r.Value("p"), r.Value("name")));
}
return triples;
}
private string PrepareIncludeString(List<string> externalIds)
{
var res = "";
for (var i = 0; i < externalIds.Count; i++)
{
res += "@externalId" + i;
if (i != externalIds.Count - 1)
{
res += " , ";
}
}
return res;
}
public string GetIkzForOrganization(Uri subject)
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"
SELECT SUBSTR( ?o, 5 ) AS ?o
WHERE {
GRAPH <https://ror.org/04xfq0f34> {
@subject <http://www.w3.org/ns/org#identifier> ?o .
FILTER strStarts( ?o, ""ikz:"" )
}
}"
};
cmdString.SetUri("subject", subject);
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(cmdString.ToString()));
return resultSet.First().Value("o").ToString();
}
public Dictionary<string, string> GetOrganizationToIkzMap()
{
var cmdString = new SparqlParameterizedString
{
CommandText = @"
SELECT ?s SUBSTR( ?o, 5 ) AS ?o
WHERE {
GRAPH <https://ror.org/04xfq0f34> {
?s <http://www.w3.org/ns/org#identifier> ?o .
FILTER strStarts( ?o, ""ikz:"" )
}
}"
};
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(cmdString.ToString()));
var dict = new Dictionary<string, string>();
foreach (SparqlResult r in resultSet)
{
dict.Add(r.Value("s").ToString(), r.Value("o").ToString());
}
return dict;
}
// Find the orgnization by the entityId or by the user identifier.
public string GetOrganization(string entityId, string identifier)
{
string organization = null;
if (entityId != null)
{
organization = GetOrgnizationWithEntityId(entityId);
}
if (organization != null)
{
return organization;
}
if (identifier != null)
{
return GetOrgnizationWithIdentifier(identifier);
}
return null;
}
// Find organization by entityId.
public string GetOrgnizationWithEntityId(string entityId)
{
var commandString = new SparqlParameterizedString
{
CommandText = @"
SELECT DISTINCT ?organization
WHERE
{
?organization org:identifier @entityId .
}"
};
commandString.Namespaces.AddNamespace("org", Uris.OrgPrefix);
commandString.SetLiteral("entityId", entityId);
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(commandString.ToString()));
if (resultSet.Count != 1)
{
return null;
}
return resultSet.First().Value("organization").ToString();
}
// Find organization by user identifier.
public string GetOrgnizationWithIdentifier(string identifier)
{
var commandString = new SparqlParameterizedString
{
CommandText = @"
SELECT DISTINCT ?organization
WHERE
{
?organization org:hasUnit ?subOrganization .
?organization a org:FormalOrganization .
?nodeId org:organization ?subOrganization .
?nodeId org:member ?organizationMember .
?organizationMember foaf:openId @identifier .
}"
};
commandString.Namespaces.AddNamespace("foaf", Uris.FoafPrefix);
commandString.Namespaces.AddNamespace("org", Uris.OrgPrefix);
commandString.SetLiteral("identifier", identifier);
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(commandString.ToString()));
if (resultSet.Count != 1)
{
return null;
}
return resultSet.First().Value("organization").ToString();
}
// Find organization email by organization's RoR URL
public string? GetOrganizationEmailByRorUrl(string rorUrl)
{
var commandString = new SparqlParameterizedString()
{
CommandText = @"
PREFIX coscineresource: <https://purl.org/coscine/terms/resource#>
SELECT ?mailbox
WHERE {
GRAPH @rorUrl {
@rorUrl coscineresource:contactInformation ?s .
?s <http://xmlns.com/foaf/0.1/mbox> ?mailbox .
}
}
"
};
commandString.SetUri("rorUrl", new Uri(rorUrl));
using var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(commandString.ToString()));
// Assuming that there is only one email per organization.
// Results for ?mailbox have the format "mailto:servicedesk@rwt-aachen.de"
return resultSet?.FirstOrDefault()?.Value("mailbox").ToString().Replace("mailto:", "");
}
// Add the membership block to the user graph.
public void AddMemebershipData(string userGraph, string organization)
{
var commandString = new SparqlParameterizedString
{
CommandText = @"
INSERT
{
GRAPH @userGraph
{
[
a org:Membership ;
org:member @member ;
org:organization @organization ;
]
}
}"
};
commandString.Namespaces.AddNamespace("org", Uris.OrgPrefix);
commandString.SetUri("userGraph", new Uri(userGraph));
commandString.SetUri("member", new Uri(userGraph));
commandString.SetUri("organization", new Uri(organization));
WrapRequest(() => QueryEndpoint.QueryRaw(commandString.ToString()));
}
// Remove the membership block to the user graph.
public void RemoveMembershipData(string userGraph, string organization)
{
var commandString = new SparqlParameterizedString
{
CommandText = @"
DELETE
{
GRAPH @userGraph
{
?s ?p0 ?o0 .
}
}
USING @userGraph WHERE
{
?s ?p0 ?o0 .
?s a org:Membership .
?s org:member @member .
?s org:organization @organization .
}"
};
commandString.Namespaces.AddNamespace("org", Uris.OrgPrefix);
commandString.SetUri("userGraph", new Uri(userGraph));
commandString.SetUri("member", new Uri(userGraph));
commandString.SetUri("organization", new Uri(organization));
WrapRequest(() => QueryEndpoint.QueryRaw(commandString.ToString()));
}
/// <summary>
/// Get the default quota for the user.
/// </summary>
/// <param name="userId">User id</param>
/// <returns>A dictonary with resource type name and default quota.</returns>
public IEnumerable<DefaultResourceQuotaModel> GetQuotaDefault(string userId)
{
var userGraphName = $"{UserUrlPrefix}/{userId}";
var commandString = new SparqlParameterizedString
{
CommandText = @"
SELECT ?resourceType MAX(?dQuota) as ?defaultQuota MAX(?dMaxQuota) as ?defaultMaxQuota
WHERE
{
?bNode2 coscineresource:type ?resourceType .
?bNode2 coscineresource:defaultQuota ?dQuota .
?bNode2 coscineresource:defaultMaxQuota ?dMaxQuota .
?organization coscineresource:typeSpecification ?bNode2 .
?bNode1 org:organization ?organization .
GRAPH @userGraph
{
?bNode1 org:member @userGraph .
}
}"
};
commandString.Namespaces.AddNamespace("org", Uris.OrgPrefix);
commandString.Namespaces.AddNamespace("coscineresource", new Uri("https://purl.org/coscine/terms/resource#"));
commandString.SetUri("userGraph", new Uri(userGraphName));
var resultSet = WrapRequest(() => QueryEndpoint.QueryWithResultSet(commandString.ToString()));
var defaultResourceQuotaModels = resultSet.Select(x =>
{
var resourceType = x.Value("resourceType").ToString();
resourceType = resourceType[(resourceType.LastIndexOf("#") + 1)..];
var defaultResourceQuotaModel = new DefaultResourceQuotaModel
{
ResourceType = resourceType,
DefaultQuota = int.Parse(x.Value("defaultQuota").ToString()),
DefaultMaxQuota = int.Parse(x.Value("defaultMaxQuota").ToString()),
};
return defaultResourceQuotaModel;
});
return defaultResourceQuotaModels;
}
}
}