Skip to content
Snippets Groups Projects
Select Git revision
  • master
  • gitkeep
  • Sprint/2022-01
  • dev protected
4 results

RdfClient.cs

Blame
  • user avatar
    sarahbensberg authored
    Command line program with functions to use Elasticsearch as a search engine for RDF-based metadata graphs in the context of CoScInE
    adbfea60
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    RdfClient.cs 21.59 KiB
    using Newtonsoft.Json.Linq;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using VDS.RDF;
    using VDS.RDF.Parsing;
    using VDS.RDF.Query;
    
    namespace SemanticSearchImplementation
    {
        /// <summary>
        /// Provides all necessary queries to the RDF database to create a mapping of metadata graphs into a document.
        /// </summary>
        public class RdfClient 
        {
            public const string LABEL_LITERAL_RULE = "instance";
            public const string LABEL_ADDITIONAL_RULE = "graph";
    
            private const int QUERY_LIMIT = 1000;
            private const string PLACEHOLDER = "$this";
    
            private readonly IRdfConnector _connector;
            private readonly DataTypeParser _dataTypeParser;
            private readonly string _language;
            private readonly SparqlParameterizedString _queryString;
    
            /// <summary>
            /// Represents the data types used in the application profiles.
            /// </summary>
            public enum ApplicationProfileType
            {
                CLASS,
                INTEGER,
                DATE,
                STRING,
                BOOLEAN
            }
    
            public RdfClient(IRdfConnector client, string language)
            {
                _connector = client;
                _language = language;
                _dataTypeParser = new DataTypeParser(this);
                _queryString = new SparqlParameterizedString();
            }
    
            public string GetLanguage() => _language;
    
            /// <summary>
            /// Returns the corresponding application profile of a metadata graph.
            /// </summary>
            /// <param name="graphName">ID of the metadata graph.</param>
            /// <returns>String representation of the application profile URI.</returns>
            public string GetApplicationProfileOfMetadata(string graphName)
            {
                _queryString.CommandText = $@"SELECT ?applicationProfile FROM @uri WHERE {{
                    @uri a ?applicationProfile 
                }} LIMIT 1";
                _queryString.SetUri("uri", new Uri(graphName));
                var results = _connector.QueryWithResultSet(_queryString);
                return results.First().Value("applicationProfile").ToString();
            }
    
            /// <summary>
            /// Marks the metadata graph as deleted.
            /// </summary>
            /// <param name="graphName">ID of the metadata graph.</param>
            public void MarkGraphAsDeleted(string graphName)
            {
                _queryString.CommandText = $@"INSERT INTO @uri {{
                    @uri {Uris.COSCINE_SEARCH_IS_DELETED} true
                }}";
                _queryString.SetUri("uri", new Uri(graphName));
                _connector.Update(_queryString);
            }
    
            /// <summary>
            /// Queries the direct classes (without inference and hierarchy) of an instance.
            /// </summary>
            /// <param name="instance">String representation of an instance URI.</param>
            /// <returns></returns>
            public IEnumerable<string> GetDirectClasses(string instance)
            {
                _queryString.CommandText = $@"SELECT ?class WHERE {{
                    @uri a ?class 
                }}";
                _queryString.SetUri("uri", new Uri(instance));
                var results = _connector.QueryWithResultSet(_queryString);
    
                return results.Select(x => x.Value("class").ToString());
            }
    
            /// <summary>
            /// Queries the parent classes of the given classes.
            /// </summary>
            /// <param name="classes">A list of classes (string representation of the URIs).</param>
            /// <returns>A list of the parent classes.</returns>
            public IList<string> GetParentClasses(IEnumerable<string> classes)
            {
                classes = classes.Select(x => $"<{x}>");
                var filterClasses = String.Join(",", classes);
                var results = _connector.QueryWithResultSet($@"SELECT * WHERE {{
                    ?class {Uris.RDFS_SUBCLASS_OF} ?parent .
                    FILTER (?class IN ({filterClasses}))
                }}", false);
                return results.Select(x => x.Value("parent").ToString()).ToList();
            }
    
            /// <summary>
            /// Queries all properties (metadata fields) used in the available application profiles.
            /// </summary>
            /// <returns>An enumerator of the properties (string representation of the URI).</returns>
            public IEnumerable<string> GetProperties()
            {
                var results = _connector.QueryWithResultSet($@"SELECT DISTINCT ?property WHERE {{
                    ?profile a {Uris.SH_NODE_SHAPE} . 
                    ?profile {Uris.SH_PROPERTY} ?profile_property . 
                    ?profile_property {Uris.SH_PATH} ?property 
                }}");
                return results.Select(x => x.Value("property").ToString());
            }
    
            /// <summary>
            /// Queries the IDs of all available metadata graphs in the knowledge graph.
            /// </summary>
            /// <returns>An iterator of an enumerator of the IDs of the metadata graphs.</returns>
            public IEnumerable<IEnumerable<string>> GetAllMetadataIds()
            {
                var result = _connector.QueryWithResultSet($@"SELECT COUNT(?g) AS ?count {{ 
                    GRAPH ?g {{
                        ?g a ?profile
                    }}
                    ?profile a {Uris.SH_NODE_SHAPE}
                }}");
    
                var numberOfResult = Convert.ToInt32(((ILiteralNode)result.First().Value("count")).Value);
    
                // iterates over results because a query limit exists
                for (var offset = 0; offset < numberOfResult; offset += QUERY_LIMIT)
                {
                    var results = _connector.QueryWithResultSet($@"SELECT ?g {{ 
                        GRAPH ?g {{
                            ?g a ?profile
                        }}
                        ?profile a {Uris.SH_NODE_SHAPE}
                    }} LIMIT {QUERY_LIMIT} OFFSET {offset}");
                    yield return results.Select(x => x.Value("g").ToString());
                }
    
            }
    
            /// <summary>
            /// Queries the rdfs:label of an URI.
            /// </summary>
            /// <param name="uri">A string representation of an URI.</param>
            /// <returns></returns>
            public string GetRdfsLabel(string uri)
            {
                _queryString.CommandText = $@"SELECT ?stripped_label WHERE {{                
                    @uri {Uris.RDFS_LABEL} ?label .                
                    FILTER (langmatches(lang(?label), '') || langmatches(lang(?label), '{_language}') ) .
                    BIND (STR(?label) AS ?stripped_label)
                }} LIMIT 1";
                _queryString.SetUri("uri", new Uri(uri));
                var results = _connector.QueryWithResultSet(_queryString);
    
                if (results.IsEmpty)
                {
                    return null;
                }
                else
                {
                    return results.First().Value("stripped_label").ToString();
                }
            }
    
            /// <summary>
            /// Queries the project IDs a user belongs to.
            /// </summary>
            /// <param name="user">A user.</param>
            /// <returns>An enumerator of project IDs.</returns>
            public IEnumerable<string> GetProjectsOfUser(string user)
            {
                if (String.IsNullOrEmpty(user))
                {
                    return new List<string>();
                }
    
                _queryString.CommandText = $@"SELECT ?project WHERE {{
                    @uri {Uris.COSCINE_PROJECTSTRUCTURE_IS_MEMBER_OF} ?project ;
                             a {Uris.FOAF_PERSON} 
                }}";
                _queryString.SetUri("uri", new Uri(user));
                var results = _connector.QueryWithResultSet(_queryString);
                return results.Select(x => x.Value("project").ToString());
            }
    
            /// <summary>
            /// Queries the sh:names of a property in all available application profiles.
            /// </summary>
            /// <param name="property">A string representation of a property URI.</param>
            /// <returns>An enumerator of names.</returns>
            public IEnumerable<string> GetApplicationProfilesNamesOfProperty(string property)
            {
                _queryString.CommandText = $@"SELECT DISTINCT ?name WHERE {{
                    ?profile_property {Uris.SH_PATH} @uri .
                    ?profile_property {Uris.SH_NAME} ?name 
                }}";
                _queryString.SetUri("uri", new Uri(property));
                var results = _connector.QueryWithResultSet(_queryString);
    
                return results.Where(x =>
                {
                    ILiteralNode nameNode = (ILiteralNode)x.Value("name");
                    return nameNode.Language == _language || nameNode.Language == String.Empty;
                }).Select(x =>
                {
                    ILiteralNode nameNode = (ILiteralNode)x.Value("name");
                    return nameNode.Value;
                });
            }
    
            /// <summary>
            /// Queries the application profile data type of a property.
            /// </summary>
            /// <remarks>Properties that have different data types (due to different profiles) are first
            /// defined as a class that maps to text.</remarks>
            /// <param name="property">String representatrion og a properties URI.</param>
            /// <returns></returns>
            public ApplicationProfileType GetTypeOfProperty(string property)
            {
                _queryString.CommandText = $@"SELECT DISTINCT ?type, ?class WHERE {{
                    ?profile {Uris.SH_PROPERTY} ?profile_property .
                    ?profile_property {Uris.SH_PATH} @uri .
                    ?profile a {Uris.SH_NODE_SHAPE} .
                    {{
                        ?profile_property {Uris.SH_CLASS} ?class .
                    }}
                    UNION 
                    {{
                        ?profile_property {Uris.SH_DATATYPE} ?type .
                    }}
                }}";
                _queryString.SetUri("uri", new Uri(property));
                var results = _connector.QueryWithResultSet(_queryString);
    
                var classes = results.Where(x => x.Value("class") != null).Select(x => x.Value("class").ToString()).ToList();
                var datatypes = results.Where(x => x.Value("type") != null).Select(x => x.Value("type").ToString()).ToList();
    
                if (classes.Count == 0 && datatypes.Count == 1)
                {
                    // unique data type
                    var type = datatypes.First();
                    return GetDataType(type);
                }
                else
                {
                    // class or ambiguous data type   
                    // classes will be mapped to text
                    return ApplicationProfileType.CLASS;
                }
            }
    
            /// <summary>
            /// Maps the XSD data types to the corresponding application profile type.
            /// </summary>
            /// <param name="type">XSD data type.</param>
            /// <returns>An application profile type.</returns>
            public ApplicationProfileType GetDataType(string type)
            {
                switch (type)
                {
                    case XmlSpecsHelper.XmlSchemaDataTypeBoolean:
                        return ApplicationProfileType.BOOLEAN;
                    case XmlSpecsHelper.XmlSchemaDataTypeInteger:
                        return ApplicationProfileType.INTEGER;
                    case XmlSpecsHelper.XmlSchemaDataTypeDateTime:
                        return ApplicationProfileType.DATE;
                    case XmlSpecsHelper.XmlSchemaDataTypeDate:
                        return ApplicationProfileType.DATE;
                    default:
                        return ApplicationProfileType.STRING;
                }
            }
    
            /// <summary>
            /// Queries the current index version.
            /// </summary>
            /// <returns>Number of the current index version.</returns>
            public int GetCurrentIndexVersion()
            {
                var result = _connector.QueryWithResultSet($@"SELECT ?version FROM <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> WHERE {{
                    <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> ?version
                }}", false);
                return Convert.ToInt32(((ILiteralNode)result.First().Value("version")).Value);
            }
    
            /// <summary>
            /// Updates the current index version.
            /// </summary>
            /// <param name="newVersion">New index version.</param>
            public void SetCurrentIndexVersion(int newVersion)
            {
                _queryString.CommandText = $@"DELETE WHERE {{
                    GRAPH <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> {{
                        <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> ?version
                    }}
                }} 
                INSERT DATA {{
                    GRAPH <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> {{
                        <{Uris.COSCINE_SEARCH_CURRENT_INDEX}> <{Uris.COSCINE_SEARCH_HAS_INDEX_VERSION}> @literal
                    }}
                }}";
                _queryString.SetLiteral("literal", newVersion);
                _connector.Update(_queryString);
            }
    
            /// <summary>
            /// Queries all literal rules of a specific named graph. 
            /// </summary>
            /// <param name="graphName">Name of the graph.</param>
            /// <returns>A dictionary containing the classes (key) and corresponding literal rules (value) of the graph.</returns>
            public Dictionary<string, LiteralRule> ConstructLiteralRules(string graphName)
            {
                _queryString.CommandText = $@"SELECT ?class, ?construct, ?prefixName FROM @uri WHERE {{
                    ?class {Uris.SH_RULE} ?rule .
                    ?rule {Uris.SH_CONSTRUCT} ?construct . 
                    ?rule {Uris.SH_PREFIXES} ?prefixName .
                    ?rule a {Uris.SH_SPARQL_RULE} .
                    OPTIONAL {{
                        ?rule {Uris.SH_ORDER} ?order 
                    }}
                }} ORDER BY ?order";
                _queryString.SetUri("uri", new Uri(graphName));
                var results = _connector.QueryWithResultSet(_queryString);
                
                return results.ToDictionary(
                    x => x.Value("class").ToString(),
                    x =>
                    {
                        SparqlParameterizedString queryString = new SparqlParameterizedString();
                        queryString.CommandText = x.Value("construct").ToString().Replace(PLACEHOLDER, $"@{LABEL_LITERAL_RULE}"); ;
    
                        var prefixName = x.Value("prefixName").ToString();
    
                        foreach (var prefixResult in GetPrefixesOfGraph(graphName, prefixName))
                        {
                            queryString.Namespaces.AddNamespace(prefixResult.Key, new Uri(prefixResult.Value));
                        }
                        LiteralRule literalRule = new LiteralRule(_connector, queryString, _language);
                        return literalRule;
                    }
                );
            }
    
            /// <summary>
            /// Queries all prefix definitions in a specific graph and of a specific prefix rule.
            /// </summary>
            /// <param name="graphName">Name of the graph.</param>
            /// <param name="prefixName">Name of the prefix rule.</param>
            /// <returns>A dictionary containing the prefixes (keys) and namespaces (value) of a prefix rule.</returns>
            private IDictionary<string, string> GetPrefixesOfGraph(string graphName, string prefixName)
            {
                _queryString.CommandText = $@"SELECT ?prefix, STR(?namespace) AS ?namespace FROM @graph WHERE {{
                    @prefix {Uris.SH_DECLARE} ?prefixRule . 
                    ?prefixRule {Uris.SH_PREFIX} ?prefix . 
                    ?prefixRule {Uris.SH_NAMESPACE} ?namespace . 
                    @prefix a {Uris.OWL_ONTOLOGY} . 
                }}";
                _queryString.SetUri("graph", new Uri(graphName));
                _queryString.SetUri("prefix", new Uri(prefixName));
                var results = _connector.QueryWithResultSet(_queryString);
                return results.ToDictionary(x => x.Value("prefix").ToString(), x => x.Value("namespace").ToString());
            }
    
            /// <summary>
            /// QUeries all additional rules of a specific named graph. 
            /// </summary>
            /// <param name="graphName">Name of the graph.</param>
            /// <returns>An enumerator of additional rules.</returns>
            public IEnumerable<AdditionalRule> ConstructAdditionalRules(string graphName)
            {
                _queryString.CommandText = $@"SELECT ?construct, ?prefixName FROM @uri WHERE {{
                    @uri {Uris.SH_RULE} ?rule .
                    ?rule {Uris.SH_CONSTRUCT} ?construct . 
                    ?rule {Uris.SH_PREFIXES} ?prefixName .
                    ?rule a {Uris.SH_SPARQL_RULE} .
                    OPTIONAL {{
                        ?rule {Uris.SH_ORDER} ?order 
                    }}
                }} ORDER BY ?order";
                _queryString.SetUri("uri", new Uri(graphName));
                var results = _connector.QueryWithResultSet(_queryString);
    
                return results.Select(x =>
                {
                    SparqlParameterizedString queryString = new SparqlParameterizedString();
                    queryString.CommandText = x.Value("construct").ToString().Replace(PLACEHOLDER, $"@{LABEL_ADDITIONAL_RULE}");
    
                    var prefixName = x.Value("prefixName").ToString();
    
                    foreach (var prefixResult in GetPrefixesOfGraph(graphName, prefixName))
                    {
                        queryString.Namespaces.AddNamespace(prefixResult.Key, new Uri(prefixResult.Value));
                    }
                    AdditionalRule additionalRule = new AdditionalRule(_connector, queryString);
                    return additionalRule;
                });
            }
    
            /// <summary>
            /// Help function to guess label based on the URI.
            /// </summary>
            /// <param name="element">String representation of an URI.</param>
            /// <returns>Guessed label.</returns>
            public string GuessLabel(string element)
            {
                Console.WriteLine($"Guess label for {element}");
                if (element.Contains("#"))
                {
                    return element.Split('#').Last();
                }
                else if (element.Contains("/"))
                {
                    var splitted = element.Split('/');
                    var last = splitted.Last();
                    if (!String.IsNullOrEmpty(last))
                    {
                        return last;
                    }
                    else
                    {
                        return splitted.ElementAt(splitted.Length - 2);
                    }
                }
                else
                {
                    Console.WriteLine($"No label could be guessed for {element}");
                    return null;
                }
            }
    
            /// <summary>
            /// Returns triples of a graph.
            /// </summary>
            /// <param name="graphName">ID of the graph.</param>
            /// <returns>An enumerator of triples.</returns>
            public IEnumerable<Triple> GetTriplesFromGraph(string graphName)
            {
                var dataGraph = _connector.GetGraph(graphName);
                return dataGraph.Triples;
            }
    
            /// <summary>
            /// Creates additional key value pairs for a metadata graph resulting through execution of the additional rules.
            /// </summary>
            /// <remarks>Additional rules can influcence other existing metadata graphs.</remarks>
            /// <param name="graphName">ID of metadata graph.</param>
            /// <param name="profile">Application profile of metadata graph.</param>
            /// <param name="indexMapper">The <c>ElasticsearchIndexMapper</c>.</param>
            /// <param name="changeOtherDocs">Flag which indicates if fields for influcenced documents should be created.</param>
            /// <returns>An dictionary containing the IDs of the metadata graphs (key) and the corresponding JSON objects (value).</returns>
            public IDictionary<string, JObject> CreateFieldsFromAdditionalRule(string graphName, SpecificApplicationProfile profile, ElasticsearchIndexMapper indexMapper, bool changeOtherDocs = false)
            {
                IDictionary<string, JObject> jObjects = new Dictionary<string, JObject>();
                var documents = profile.GetAdditionalTriples(graphName);
    
                if (changeOtherDocs)
                {
                    foreach (var graph in documents.Keys)
                    {
                        jObjects.Add(graph, CreateFields(documents[graph], profile, indexMapper));
                    }                
                } 
                else if (documents.Count() > 0)
                {
                    jObjects.Add(graphName, CreateFields(documents[graphName], profile, indexMapper));
                }
                
                return jObjects;
            }
    
            /// <summary>
            /// Creates key value pairs from the given triples.
            /// </summary>
            /// <param name="triples">An enumerator of triples which needs to be parsed.</param>
            /// <param name="profile">The application profile which the triples belong to.</param>
            /// <param name="indexMapper">The <c>ElasticsearchIndexMapper</c>.</param>
            /// <returns></returns>
            public JObject CreateFields(IEnumerable<Triple> triples, SpecificApplicationProfile profile, ElasticsearchIndexMapper indexMapper)
            {
                var jObject = new JObject();
                foreach (var triple in triples)
                {
                    var property = triple.Predicate.ToString();
                    if (String.Equals(property, Uris.RDF_TYPE_LONG))
                    {
                        continue;
                    }
    
                    var label = indexMapper.GetLabelOfProperty(property);
                    if (String.IsNullOrEmpty(label))
                    {
                        Console.WriteLine($"Property {property} could not be indexed because no label was found.");
                        continue;
                    }
                    try
                    {
                        jObject.Merge(_dataTypeParser.Parse(label, triple.Object, indexMapper, profile));
                    } catch (NotIndexableException e)
                    {
                        Console.WriteLine($"Property {property} could not be indexed. Reason: {e.Reason}");
                        continue;
                    }
                }
                return jObject;
            }
        }
    }