Select Git revision
RdfClient.cs
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RdfClient.cs 21.59 KiB
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Linq;
using VDS.RDF;
using VDS.RDF.Parsing;
using VDS.RDF.Query;
namespace SemanticSearchImplementation
{
/// <summary>
/// Provides all necessary queries to the RDF database to create a mapping of metadata graphs into a document.
/// </summary>
public class RdfClient
{
public const string LABEL_LITERAL_RULE = "instance";
public const string LABEL_ADDITIONAL_RULE = "graph";
private const int QUERY_LIMIT = 1000;
private const string PLACEHOLDER = "$this";
private readonly IRdfConnector _connector;
private readonly DataTypeParser _dataTypeParser;
private readonly string _language;
private readonly SparqlParameterizedString _queryString;
/// <summary>
/// Represents the data types used in the application profiles.
/// </summary>
public enum ApplicationProfileType
{
CLASS,
INTEGER,
DATE,
STRING,
BOOLEAN
}
public RdfClient(IRdfConnector client, string language)
{
_connector = client;
_language = language;
_dataTypeParser = new DataTypeParser(this);
_queryString = new SparqlParameterizedString();
}
public string GetLanguage() => _language;
/// <summary>
/// Returns the corresponding application profile of a metadata graph.
/// </summary>
/// <param name="graphName">ID of the metadata graph.</param>
/// <returns>String representation of the application profile URI.</returns>
public string GetApplicationProfileOfMetadata(string graphName)
{
_queryString.CommandText = $@"SELECT ?applicationProfile FROM @uri WHERE {{
@uri a ?applicationProfile
}} LIMIT 1";
_queryString.SetUri("uri", new Uri(graphName));
var results = _connector.QueryWithResultSet(_queryString);
return results.First().Value("applicationProfile").ToString();
}
/// <summary>
/// Marks the metadata graph as deleted.
/// </summary>
/// <param name="graphName">ID of the metadata graph.</param>
public void MarkGraphAsDeleted(string graphName)
{
_queryString.CommandText = $@"INSERT INTO @uri {{
@uri {Uris.COSCINE_SEARCH_IS_DELETED} true
}}";
_queryString.SetUri("uri", new Uri(graphName));
_connector.Update(_queryString);
}
/// <summary>
/// Queries the direct classes (without inference and hierarchy) of an instance.
/// </summary>
/// <param name="instance">String representation of an instance URI.</param>
/// <returns></returns>
public IEnumerable<string> GetDirectClasses(string instance)
{
_queryString.CommandText = $@"SELECT ?class WHERE {{
@uri a ?class
}}";
_queryString.SetUri("uri", new Uri(instance));
var results = _connector.QueryWithResultSet(_queryString);
return results.Select(x => x.Value("class").ToString());
}
/// <summary>
/// Queries the parent classes of the given classes.
/// </summary>
/// <param name="classes">A list of classes (string representation of the URIs).</param>
/// <returns>A list of the parent classes.</returns>
public IList<string> GetParentClasses(IEnumerable<string> classes)
{
classes = classes.Select(x => $"<{x}>");
var filterClasses = String.Join(",", classes);
var results = _connector.QueryWithResultSet($@"SELECT * WHERE {{
?class {Uris.RDFS_SUBCLASS_OF} ?parent .
FILTER (?class IN ({filterClasses}))
}}", false);
return results.Select(x => x.Value("parent").ToString()).ToList();
}
/// <summary>
/// Queries all properties (metadata fields) used in the available application profiles.
/// </summary>
/// <returns>An enumerator of the properties (string representation of the URI).</returns>
public IEnumerable<string> GetProperties()
{
var results = _connector.QueryWithResultSet($@"SELECT DISTINCT ?property WHERE {{
?profile a {Uris.SH_NODE_SHAPE} .
?profile {Uris.SH_PROPERTY} ?profile_property .
?profile_property {Uris.SH_PATH} ?property
}}");
return results.Select(x => x.Value("property").ToString());
}
/// <summary>
/// Queries the IDs of all available metadata graphs in the knowledge graph.
/// </summary>
/// <returns>An iterator of an enumerator of the IDs of the metadata graphs.</returns>
public IEnumerable<IEnumerable<string>> GetAllMetadataIds()
{
var result = _connector.QueryWithResultSet($@"SELECT COUNT(?g) AS ?count {{
GRAPH ?g {{
?g a ?profile
}}
?profile a {Uris.SH_NODE_SHAPE}
}}");
var numberOfResult = Convert.ToInt32(((ILiteralNode)result.First().Value("count")).Value);
// iterates over results because a query limit exists
for (var offset = 0; offset < numberOfResult; offset += QUERY_LIMIT)
{
var results = _connector.QueryWithResultSet($@"SELECT ?g {{
GRAPH ?g {{
?g a ?profile
}}
?profile a {Uris.SH_NODE_SHAPE}
}} LIMIT {QUERY_LIMIT} OFFSET {offset}");
yield return results.Select(x => x.Value("g").ToString());
}
}
/// <summary>
/// Queries the rdfs:label of an URI.
/// </summary>
/// <param name="uri">A string representation of an URI.</param>
/// <returns></returns>
public string GetRdfsLabel(string uri)
{
_queryString.CommandText = $@"SELECT ?stripped_label WHERE {{
@uri {Uris.RDFS_LABEL} ?label .
FILTER (langmatches(lang(?label), '') || langmatches(lang(?label), '{_language}') ) .
BIND (STR(?label) AS ?stripped_label)
}} LIMIT 1";
_queryString.SetUri("uri", new Uri(uri));
var results = _connector.QueryWithResultSet(_queryString);
if (results.IsEmpty)
{
return null;
}
else
{
return results.First().Value("stripped_label").ToString();
}
}
/// <summary>
/// Queries the project IDs a user belongs to.
/// </summary>
/// <param name="user">A user.</param>
/// <returns>An enumerator of project IDs.</returns>
public IEnumerable<string> GetProjectsOfUser(string user)
{
if (String.IsNullOrEmpty(user))
{
return new List<string>();
}
_queryString.CommandText = $@"SELECT ?project WHERE {{
@uri {Uris.COSCINE_PROJECTSTRUCTURE_IS_MEMBER_OF} ?project ;
a {Uris.FOAF_PERSON}
}}";
_queryString.SetUri("uri", new Uri(user));
var results = _connector.QueryWithResultSet(_queryString);
return results.Select(x => x.Value("project").ToString());
}
/// <summary>
/// Queries the sh:names of a property in all available application profiles.
/// </summary>
/// <param name="property">A string representation of a property URI.</param>
/// <returns>An enumerator of names.</returns>
public IEnumerable<string> GetApplicationProfilesNamesOfProperty(string property)
{
_queryString.CommandText = $@"SELECT DISTINCT ?name WHERE {{
?profile_property {Uris.SH_PATH} @uri .
?profile_property {Uris.SH_NAME} ?name
}}";
_queryString.SetUri("uri", new Uri(property));
var results = _connector.QueryWithResultSet(_queryString);
return results.Where(x =>
{
ILiteralNode nameNode = (ILiteralNode)x.Value("name");
return nameNode.Language == _language || nameNode.Language == String.Empty;
}).Select(x =>
{
ILiteralNode nameNode = (ILiteralNode)x.Value("name");
return nameNode.Value;
});
}
/// <summary>
/// Queries the application profile data type of a property.
/// </summary>
/// <remarks>Properties that have different data types (due to different profiles) are first
/// defined as a class that maps to text.</remarks>
/// <param name="property">String representatrion og a properties URI.</param>
/// <returns></returns>
public ApplicationProfileType GetTypeOfProperty(string property)
{
_queryString.CommandText = $@"SELECT DISTINCT ?type, ?class WHERE {{
?profile {Uris.SH_PROPERTY} ?profile_property .
?profile_property {Uris.SH_PATH} @uri .
?profile a {Uris.SH_NODE_SHAPE} .
{{
?profile_property {Uris.SH_CLASS} ?class .
}}
UNION
{{
?profile_property {Uris.SH_DATATYPE} ?type .
}}
}}";
_queryString.SetUri("uri", new Uri(property));
var results = _connector.QueryWithResultSet(_queryString);
var classes = results.Where(x => x.Value("class") != null).Select(x => x.Value("class").ToString()).ToList();
var datatypes = results.Where(x => x.Value("type") != null).Select(x => x.Value("type").ToString()).ToList();
if (classes.Count == 0 && datatypes.Count == 1)
{
// unique data type
var type = datatypes.First();
return GetDataType(type);
}
else
{
// class or ambiguous data type
// classes will be mapped to text
return ApplicationProfileType.CLASS;
}
}
/// <summary>
/// Maps the XSD data types to the corresponding application profile type.
/// </summary>
/// <param name="type">XSD data type.</param>
/// <returns>An application profile type.</returns>
public ApplicationProfileType GetDataType(string type)
{
switch (type)
{
case XmlSpecsHelper.XmlSchemaDataTypeBoolean:
return ApplicationProfileType.BOOLEAN;
case XmlSpecsHelper.XmlSchemaDataTypeInteger:
return ApplicationProfileType.INTEGER;
case XmlSpecsHelper.XmlSchemaDataTypeDateTime:
return ApplicationProfileType.DATE;
case XmlSpecsHelper.XmlSchemaDataTypeDate:
return ApplicationProfileType.DATE;
default:
return ApplicationProfileType.STRING;
}
}
/// <summary>
/// Queries the current index version.
/// </summary>
/// <returns>Number of the current index version.</returns>
public int GetCurrentIndexVersion()
{
var result = _connector.QueryWithResultSet($@"SELECT ?version FROM <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> WHERE {{
<{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> ?version
}}", false);
return Convert.ToInt32(((ILiteralNode)result.First().Value("version")).Value);
}
/// <summary>
/// Updates the current index version.
/// </summary>
/// <param name="newVersion">New index version.</param>
public void SetCurrentIndexVersion(int newVersion)
{
_queryString.CommandText = $@"DELETE WHERE {{
GRAPH <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> {{
<{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> ?version
}}
}}
INSERT DATA {{
GRAPH <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> {{
<{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> @literal
}}
}}";
_queryString.SetLiteral("literal", newVersion);
_connector.Update(_queryString);
}
/// <summary>
/// Queries all literal rules of a specific named graph.
/// </summary>
/// <param name="graphName">Name of the graph.</param>
/// <returns>A dictionary containing the classes (key) and corresponding literal rules (value) of the graph.</returns>
public Dictionary<string, LiteralRule> ConstructLiteralRules(string graphName)
{
_queryString.CommandText = $@"SELECT ?class, ?construct, ?prefixName FROM @uri WHERE {{
?class {Uris.SH_RULE} ?rule .
?rule {Uris.SH_CONSTRUCT} ?construct .
?rule {Uris.SH_PREFIXES} ?prefixName .
?rule a {Uris.SH_SPARQL_RULE} .
OPTIONAL {{
?rule {Uris.SH_ORDER} ?order
}}
}} ORDER BY ?order";
_queryString.SetUri("uri", new Uri(graphName));
var results = _connector.QueryWithResultSet(_queryString);
return results.ToDictionary(
x => x.Value("class").ToString(),
x =>
{
SparqlParameterizedString queryString = new SparqlParameterizedString();
queryString.CommandText = x.Value("construct").ToString().Replace(PLACEHOLDER, $"@{LABEL_LITERAL_RULE}"); ;
var prefixName = x.Value("prefixName").ToString();
foreach (var prefixResult in GetPrefixesOfGraph(graphName, prefixName))
{
queryString.Namespaces.AddNamespace(prefixResult.Key, new Uri(prefixResult.Value));
}
LiteralRule literalRule = new LiteralRule(_connector, queryString, _language);
return literalRule;
}
);
}
/// <summary>
/// Queries all prefix definitions in a specific graph and of a specific prefix rule.
/// </summary>
/// <param name="graphName">Name of the graph.</param>
/// <param name="prefixName">Name of the prefix rule.</param>
/// <returns>A dictionary containing the prefixes (keys) and namespaces (value) of a prefix rule.</returns>
private IDictionary<string, string> GetPrefixesOfGraph(string graphName, string prefixName)
{
_queryString.CommandText = $@"SELECT ?prefix, STR(?namespace) AS ?namespace FROM @graph WHERE {{
@prefix {Uris.SH_DECLARE} ?prefixRule .
?prefixRule {Uris.SH_PREFIX} ?prefix .
?prefixRule {Uris.SH_NAMESPACE} ?namespace .
@prefix a {Uris.OWL_ONTOLOGY} .
}}";
_queryString.SetUri("graph", new Uri(graphName));
_queryString.SetUri("prefix", new Uri(prefixName));
var results = _connector.QueryWithResultSet(_queryString);
return results.ToDictionary(x => x.Value("prefix").ToString(), x => x.Value("namespace").ToString());
}
/// <summary>
/// QUeries all additional rules of a specific named graph.
/// </summary>
/// <param name="graphName">Name of the graph.</param>
/// <returns>An enumerator of additional rules.</returns>
public IEnumerable<AdditionalRule> ConstructAdditionalRules(string graphName)
{
_queryString.CommandText = $@"SELECT ?construct, ?prefixName FROM @uri WHERE {{
@uri {Uris.SH_RULE} ?rule .
?rule {Uris.SH_CONSTRUCT} ?construct .
?rule {Uris.SH_PREFIXES} ?prefixName .
?rule a {Uris.SH_SPARQL_RULE} .
OPTIONAL {{
?rule {Uris.SH_ORDER} ?order
}}
}} ORDER BY ?order";
_queryString.SetUri("uri", new Uri(graphName));
var results = _connector.QueryWithResultSet(_queryString);
return results.Select(x =>
{
SparqlParameterizedString queryString = new SparqlParameterizedString();
queryString.CommandText = x.Value("construct").ToString().Replace(PLACEHOLDER, $"@{LABEL_ADDITIONAL_RULE}");
var prefixName = x.Value("prefixName").ToString();
foreach (var prefixResult in GetPrefixesOfGraph(graphName, prefixName))
{
queryString.Namespaces.AddNamespace(prefixResult.Key, new Uri(prefixResult.Value));
}
AdditionalRule additionalRule = new AdditionalRule(_connector, queryString);
return additionalRule;
});
}
/// <summary>
/// Help function to guess label based on the URI.
/// </summary>
/// <param name="element">String representation of an URI.</param>
/// <returns>Guessed label.</returns>
public string GuessLabel(string element)
{
Console.WriteLine($"Guess label for {element}");
if (element.Contains("#"))
{
return element.Split('#').Last();
}
else if (element.Contains("/"))
{
var splitted = element.Split('/');
var last = splitted.Last();
if (!String.IsNullOrEmpty(last))
{
return last;
}
else
{
return splitted.ElementAt(splitted.Length - 2);
}
}
else
{
Console.WriteLine($"No label could be guessed for {element}");
return null;
}
}
/// <summary>
/// Returns triples of a graph.
/// </summary>
/// <param name="graphName">ID of the graph.</param>
/// <returns>An enumerator of triples.</returns>
public IEnumerable<Triple> GetTriplesFromGraph(string graphName)
{
var dataGraph = _connector.GetGraph(graphName);
return dataGraph.Triples;
}
/// <summary>
/// Creates additional key value pairs for a metadata graph resulting through execution of the additional rules.
/// </summary>
/// <remarks>Additional rules can influcence other existing metadata graphs.</remarks>
/// <param name="graphName">ID of metadata graph.</param>
/// <param name="profile">Application profile of metadata graph.</param>
/// <param name="indexMapper">The <c>ElasticsearchIndexMapper</c>.</param>
/// <param name="changeOtherDocs">Flag which indicates if fields for influcenced documents should be created.</param>
/// <returns>An dictionary containing the IDs of the metadata graphs (key) and the corresponding JSON objects (value).</returns>
public IDictionary<string, JObject> CreateFieldsFromAdditionalRule(string graphName, SpecificApplicationProfile profile, ElasticsearchIndexMapper indexMapper, bool changeOtherDocs = false)
{
IDictionary<string, JObject> jObjects = new Dictionary<string, JObject>();
var documents = profile.GetAdditionalTriples(graphName);
if (changeOtherDocs)
{
foreach (var graph in documents.Keys)
{
jObjects.Add(graph, CreateFields(documents[graph], profile, indexMapper));
}
}
else if (documents.Count() > 0)
{
jObjects.Add(graphName, CreateFields(documents[graphName], profile, indexMapper));
}
return jObjects;
}
/// <summary>
/// Creates key value pairs from the given triples.
/// </summary>
/// <param name="triples">An enumerator of triples which needs to be parsed.</param>
/// <param name="profile">The application profile which the triples belong to.</param>
/// <param name="indexMapper">The <c>ElasticsearchIndexMapper</c>.</param>
/// <returns></returns>
public JObject CreateFields(IEnumerable<Triple> triples, SpecificApplicationProfile profile, ElasticsearchIndexMapper indexMapper)
{
var jObject = new JObject();
foreach (var triple in triples)
{
var property = triple.Predicate.ToString();
if (String.Equals(property, Uris.RDF_TYPE_LONG))
{
continue;
}
var label = indexMapper.GetLabelOfProperty(property);
if (String.IsNullOrEmpty(label))
{
Console.WriteLine($"Property {property} could not be indexed because no label was found.");
continue;
}
try
{
jObject.Merge(_dataTypeParser.Parse(label, triple.Object, indexMapper, profile));
} catch (NotIndexableException e)
{
Console.WriteLine($"Property {property} could not be indexed. Reason: {e.Reason}");
continue;
}
}
return jObject;
}
}
}