Skip to content
Snippets Groups Projects
Commit b32ce2a5 authored by Lars Pieschel's avatar Lars Pieschel
Browse files

Complete rework

parents
No related branches found
No related tags found
No related merge requests found
__pycache__
.vscode
jupyter_rdfify.egg-info
\ No newline at end of file
Copyright 2020 RWTH Aachen i5
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
# Jupyter-RDFify
TODO: Write Readme
\ No newline at end of file
__version__ = '1.0.0'
from IPython.display import display_javascript
from .jupyter_rdf import JupyterRDF
from .serialization import SerializationModule
from .sparql import SPARQLModule
from .shex import ShexModule
from .graph_manager import GraphManagerModule
def load_ipython_extension(ipython):
"""Executed when loading the extension. Registers all default modules and the %rdf magic."""
# Activates syntax highlighting for sparql, turtle and json-ld in jupyter notebook.
# This does not work on JupyterLab because the global IPython object is not defined there.
js_highlight = """
if (typeof IPython !== "undefined") {
IPython.CodeCell.options_default.highlight_modes['application/sparql-query'] = {'reg':[/^%%rdf sparql/]};
IPython.CodeCell.options_default.highlight_modes['text/turtle'] = {'reg':[/^%%rdf turtle/, /^%%rdf shex/]};
IPython.CodeCell.options_default.highlight_modes['application/ld+json'] = {'reg':[/^%%rdf json-ld/]};
IPython.notebook.get_cells().map(function(cell){ if (cell.cell_type == 'code'){ cell.auto_highlight(); } });
}
"""
display_javascript(js_highlight, raw=True)
ipython.push({
"rdfgraphs": dict(),
"rdfsources": dict(),
"rdfresults": dict(),
"rdfshapes": dict()
}, True)
jupyter_rdf = JupyterRDF(ipython)
jupyter_rdf.register_module(
SerializationModule, "turtle", "Turtle module", "Turtle")
jupyter_rdf.register_module(
SerializationModule, "n3", "Notation 3 module", "N3")
jupyter_rdf.register_module(
SerializationModule, "json-ld", "JSON-LD module", "JSON-LD")
jupyter_rdf.register_module(
SerializationModule, "xml", "XML+RDF module", "XML+RDF")
jupyter_rdf.register_module(
SPARQLModule, "sparql", "SPARQL module", "SPARQL")
jupyter_rdf.register_module(ShexModule, "shex", "ShEx module", "ShEx")
jupyter_rdf.register_module(
GraphManagerModule, "graph", "Graph management module", "Graphman")
ipython.register_magics(jupyter_rdf)
from graphviz import Digraph
import rdflib
from .util import literal_to_string, StopCellExecution
def draw_graph(g, logger, shorten_uris=True, rename_blank_nodes=True):
ns = g.namespace_manager
dot = Digraph()
nodes = dict()
bnodes = 0
for i, node in enumerate(g.all_nodes()):
if isinstance(node, rdflib.term.URIRef):
if shorten_uris:
l = node.n3(ns)
else:
l = node.n3()
dot.node(str(i), label=l)
elif isinstance(node, rdflib.term.BNode):
if rename_blank_nodes:
l = f"_:bn{bnodes}"
bnodes += 1
else:
l = node.n3()
dot.node(str(i), label=l)
elif isinstance(node, rdflib.term.Literal):
if shorten_uris:
l = node.n3(ns)
else:
l = node.n3()
dot.node(str(i), label=l, shape="box")
else:
continue
nodes[node.n3()] = str(i)
for s, p, o in g:
if shorten_uris:
l = p.n3(ns)
else:
l = p.n3()
dot.edge(nodes[s.n3()], nodes[o.n3()], label=l)
logger.out(dot)
def parse_graph(string, logger, fmt="xml"):
try:
return rdflib.Graph().parse(data=string, format=fmt)
except Exception as err:
logger.print(f"Could not parse {fmt} graph:<br>{str(err)}")
raise StopCellExecution
from owlrl import DeductiveClosure, RDFS_Semantics, OWLRL_Semantics
from .graph import draw_graph
from .rdf_module import RDFModule
class GraphManagerModule(RDFModule):
def __init__(self, name, parser, logger, description, displayname):
super().__init__(name, parser, logger, description, displayname)
self.parser.add_argument(
"action", choices=["list", "remove", "draw", "entail-rdfs", "entail-owl", "entail-rdfs+owl"], help="Action to perform")
self.parser.add_argument(
"--label", "-l", help="Reference a local graph by label")
def check_label(self, label, store):
if label is not None:
if label in store["rdfgraphs"]:
return True
else:
self.log(f"Graph labelled '{label}' not found.")
else:
self.log(
"Please specify the label of a graph with parameter --label or -l.")
def handle(self, params, store):
if params.action is not None:
if params.action == "list":
labels = "The following labelled graphs are present:<br><ul>"
for label in store["rdfgraphs"].keys():
labels += f"<li>{label}</li>"
self.logger.display_html(labels + "</ul>")
elif params.action == "draw":
if self.check_label(params.label, store):
draw_graph(store["rdfgraphs"]
[params.label], self.logger)
elif params.action == "remove":
if self.check_label(params.label, store):
del store["rdfgraphs"][params.label]
self.log(
f"Graph labelled '{params.label}' has been removed.")
elif params.action == "entail-rdfs":
if self.check_label(params.label, store):
DeductiveClosure(RDFS_Semantics).expand(
store["rdfgraphs"][params.label])
self.log(
f"Graph labelled '{params.label}' has been entailed using the RDFS regime.")
elif params.action == "entail-owl":
if self.check_label(params.label, store):
DeductiveClosure(OWLRL_Semantics).expand(
store["rdfgraphs"][params.label])
self.log(
f"Graph labelled '{params.label}' has been entailed using the OWL-RL regime.")
elif params.action == "entail-rdfs+owl":
if self.check_label(params.label, store):
DeductiveClosure(OWLRL_Semantics, rdfs_closure=True).expand(
store["rdfgraphs"][params.label])
self.log(
f"Graph labelled '{params.label}' has been entailed using the RDFS regime and then the OWL-RL regime.")
from IPython.core.magic import (
Magics, magics_class, line_cell_magic, needs_local_scope)
from shlex import split
from .rdf_module import RDFModule
from .log import RDFLogger
from .util import MagicParser
@magics_class
class JupyterRDF(Magics):
def __init__(self, shell):
super(JupyterRDF, self).__init__(shell)
self.parser = MagicParser("%rdf")
self.parser.set_defaults(func=lambda _: (
self.logger.out("Usage: %rdf --help")))
self.parser.add_argument(
"--verbose", "-v", help="Enable verbose output", action="store_true")
self.parser.add_argument(
"--return-store", "-r", help="Returns a copy of all present elements (graphs, schemas, etc.)", action="store_true")
self.subparsers = self.parser.add_subparsers(help="RDF modules")
self.submodules = list()
self.logger = RDFLogger()
self.store = {
"rdfgraphs": dict(),
"rdfsources": dict(),
"rdfresults": dict(),
"rdfshapes": dict()
}
def register_module(self, module_class, name, description="", displayname=None):
assert issubclass(module_class, RDFModule)
self.submodules.append(module_class(
name, self.subparsers, self.logger, description, displayname))
@line_cell_magic
def rdf(self, line, cell=None):
try:
args = self.parser.parse_args(split(line))
self.logger.set_verbose(args.verbose)
if args.return_store:
return self.store
args.cell = cell
return args.func(args, self.store)
except Exception as e:
self.logger.print(str(e))
from IPython.display import display, HTML
class RDFLogger:
def __init__(self):
self.verbose = False
def set_verbose(self, verbose=True):
self.verbose = verbose
def print(self, msg, verbose=False):
self.out(msg, verbose, True)
def display_html(self, html):
self.out(HTML(html))
def out(self, msg, verbose=False, _print=False):
if verbose and not self.verbose:
return
else:
if _print:
print(msg)
else:
display(msg)
from abc import ABC, abstractmethod
class RDFModule(ABC):
def __init__(self, name, parser, logger, description="", displayname=None):
self.name = name
self.logger = logger
self.parser = parser.add_parser(name.lower(), help=description)
self.parser.set_defaults(func=self.handle)
if displayname is None:
self.displayname = name
else:
self.displayname = displayname
@abstractmethod
def handle(self, params, store=None):
raise NotImplementedError()
def log(self, msg, verbose=False):
self.logger.out(f"{self.displayname}: {msg}", verbose, True)
import rdflib
from IPython.display import display_pretty
from owlrl import DeductiveClosure, RDFS_Semantics, OWLRL_Semantics
from .rdf_module import RDFModule
from .graph import parse_graph, draw_graph
from .table import graph_spo_iterator, html_table
from .util import strip_comments
displays = ["graph", "table", "raw", "none"]
formats = ["turtle", "json-ld", "xml"]
class SerializationModule(RDFModule):
def __init__(self, name, parser, logger, description, displayname):
super().__init__(name, parser, logger, description, displayname)
self.parser.add_argument(
"--serialize", "-s", choices=formats, default="turtle", help="Format for serializing when display is set to raw.")
self.parser.add_argument(
"--display", "-d", choices=displays, default="graph", help="How output is displayed")
self.parser.add_argument(
"--label", "-l", help="Store graph locally with this label")
self.parser.add_argument(
"--prefix", "-p", help="Define a prefix which gets prepend to every query. Useful for PREFIX declarations", action="store_true")
self.parser.add_argument(
"--entail", "-e", choices=["rdfs", "owl", "rdfs+owl"], help="Uses a brute force implementation of the finite version of RDFS semantics or OWL 2 RL. Uses owlrl python package.")
self.prefix = ""
def handle(self, params, store):
if params.cell is not None:
if params.prefix:
self.prefix = params.cell + "\n"
self.log("Stored prefix.")
else:
try:
code = strip_comments(params.cell)
g = parse_graph(self.prefix + code,
self.logger, self.name)
except Exception as e:
self.log(f"Parse failed:\n{str(e)}")
store["rdfgraphs"]["last"] = None
store["rdfsources"]["last"] = self.prefix + params.cell
return
g.source = lambda: self.name
if params.label is not None:
store["rdfgraphs"][params.label] = g
store["rdfsources"][params.label] = self.prefix + params.cell
store["rdfgraphs"]["last"] = g
store["rdfsources"]["last"] = self.prefix + params.cell
if params.entail is not None:
if params.entail == "rdfs":
DeductiveClosure(RDFS_Semantics).expand(g)
elif params.entail == "owl":
DeductiveClosure(OWLRL_Semantics).expand(g)
elif params.entail == "rdfs+owl":
DeductiveClosure(
OWLRL_Semantics, rdfs_closure=True).expand(g)
if params.display == "none":
return
elif params.display == "graph":
draw_graph(g, self.logger)
elif params.display == "table":
self.logger.display_html(html_table(graph_spo_iterator(g)))
else:
display_pretty(g.serialize(format=params.serialize,
encoding="utf-8",).decode("utf-8"), raw=True)
from pyshex.utils.schema_loader import SchemaLoader
from pyshex import ShExEvaluator
from .rdf_module import RDFModule
class ShexModule(RDFModule):
def __init__(self, name, parser, logger, description, displayname):
super().__init__(name, parser, logger, description, displayname)
self.parser.add_argument(
"action", choices=["parse", "validate", "prefix"], help="Action to perform")
self.parser.add_argument(
"--label", "-l", help="Shape label for referencing")
self.parser.add_argument(
"--graph", "-g", help="Graph label for validation")
self.parser.add_argument(
"--focus", "-f", help="URI of node to focus on"
)
self.parser.add_argument(
"--start", "-s", help="Starting shape"
)
self.loader = SchemaLoader()
self.evaluator = ShExEvaluator()
self.prefix = ""
def print_result(self, result):
self.log(f"Evaluating shape '{result.start}' on node '{result.focus}'")
if result.result:
self.logger.print("PASSED!")
else:
self.logger.print(f"FAILED! Reason:\n{result.reason}\n")
def handle(self, params, store):
if params.action == "prefix":
self.prefix = params.cell + "\n"
self.log("Stored Prefix.")
elif params.action == "parse":
if params.cell is not None:
try:
schema = self.loader.loads(self.prefix + params.cell)
if params.label is not None and schema is not None:
store["rdfshapes"][params.label] = schema
self.log("Shape successfully parsed.")
except Exception as e:
self.log(f"Error during shape parse:\n{str(e)}")
else:
self.log("No cell content to parse.")
elif params.action == "validate":
if params.label is not None and params.graph is not None:
if params.label in store["rdfshapes"]:
if params.graph in store["rdfgraphs"]:
result = self.evaluator.evaluate(
store["rdfgraphs"][params.graph],
store["rdfshapes"][params.label],
start=params.start,
focus=params.focus
)
for r in result:
self.print_result(r)
else:
self.log(
f"Found no graph with label '{params.graph}'.")
else:
self.log(f"Found no shape with label '{params.label}'.")
else:
self.log("A shape and a graph label are required for validation.")
from IPython.display import display, display_pretty
from SPARQLWrapper import SPARQLWrapper
from cgi import parse_header
from .rdf_module import RDFModule
from .graph import parse_graph, draw_graph
from .table import display_table, html_table
formats = ["xml", "json"]
displays = ["graph", "table", "raw", "none"]
mime_types = {
"application/sparql-results+xml": ["table"],
"application/rdf+xml": ["table", "graph"],
"application/xml": [],
"application/sparql-results+json": ["table"],
}
class SPARQLModule(RDFModule):
def __init__(self, name, parser, logger, description, displayname):
super().__init__(name, parser, logger, description, displayname)
self.parser.add_argument("--endpoint", "-e", help="SPARQL endpoint")
self.parser.add_argument(
"--format", "-f", choices=formats, help="Requested format for query result", default="xml")
self.parser.add_argument(
"--display", "-d", choices=displays, help="How output is displayed. Does not work for local queries.", default="table")
grp = self.parser.add_mutually_exclusive_group()
grp.add_argument(
"--prefix", "-p", help="Define a prefix which gets prepend to every query. Useful for PREFIX declarations", action="store_true")
grp.add_argument(
"--local", "-l", help="Give a label of a local graph. This cell will then ignore the endpoint and query the graph instead")
self.parser.add_argument(
"--store", "-s", help="Store result of the query with this label")
self.prefix = ""
self.wrapper = None
def query(self, query, params):
if params.endpoint is not None:
self.wrapper = SPARQLWrapper(params.endpoint)
if self.wrapper is not None:
self.wrapper.setQuery(self.prefix + query)
self.wrapper.setReturnFormat(params.format)
try:
result = self.wrapper.query()
if result._get_responseFormat() != params.format:
self.log(
f"""
The server responded with a format different from the requested format.\n
Either the server does not support the requested format or the query resulted in an incompatible type.\n
Requested: '{params.format}', Response: '{result._get_responseFormat()}'
""")
content_type = parse_header(result.info()["content-type"])
body = result.response.read()
self.display_response(body, content_type[0], params.display)
return result
except Exception as e:
self.log(f"Error during query:\n{str(e)}")
else:
self.log("Endpoint not set. Use --endpoint parameter.")
def queryLocal(self, query, graph):
try:
res = graph.query(query)
if res.type == "SELECT":
self.logger.display_html(
html_table(select_result_row_iter(res)))
elif res.type == "ASK":
self.logger.print(res.askAnswer)
elif res.type == "CONSTRUCT":
draw_graph(res.graph, self.logger)
return res
except Exception as e:
self.log(f"Error during local query:\n{str(e)}")
def display_response(self, body, mime, method):
if method == "none":
return
if not mime in mime_types:
self.log(
f"Mime type '{mime}' not supported. Defaulting to raw display.")
method = "raw"
elif method in mime_types[mime]:
if method == "graph":
g = parse_graph(body, self.logger)
draw_graph(g, self.logger)
elif method == "table":
display_table(body, mime, self.logger)
else:
if method != "raw":
self.log(
f"Incompatible display option '{method}' for mime type '{mime}'. Defaulting to raw display.")
self.logger.print(body.decode("utf-8"))
def handle(self, params, store):
if params.cell is not None:
if params.prefix:
self.prefix = params.cell + "\n"
self.log("Stored prefix.")
elif params.local is not None:
if params.local in store["rdfgraphs"]:
res = self.queryLocal(
self.prefix + params.cell, store["rdfgraphs"][params.local])
if params.store is not None:
store["rdfresults"][params.store] = res
store["rdfsources"][params.store] = params.cell
store["rdfresults"]["last"] = res
store["rdfsources"]["last"] = params.cell
else:
self.log(f"Graph labelled '{params.local}' not found.")
else:
res = self.query(self.prefix + params.cell, params)
if params.store is not None:
store["rdfresults"][params.store] = res
store["rdfsources"][params.store] = params.cell
store["rdfresults"]["last"] = res
store["rdfsources"]["last"] = params.cell
def select_result_row_iter(result):
header = []
for var in result.vars:
header.append(var.n3())
yield header
for row in result:
yield row
from IPython.display import HTML
import xml.etree.ElementTree as ET
from .graph import parse_graph
import json
def display_table(body, mime, logger):
if mime == "application/sparql-results+xml":
root = ET.fromstring(body)
logger.display_html(html_table(xml_row_iterator(root)))
elif mime == "application/sparql-results+json":
result = json.loads(body)
logger.display_html(html_table(json_row_iterator(result)))
elif mime == "application/rdf+xml":
g = parse_graph(body, logger)
logger.display_html(html_table(graph_spo_iterator(g)))
else:
logger.print("Could not display table")
#
def xml_row_iterator(elem):
"""Iterates a Sparql xml result (http://www.w3.org/2005/sparql-results#) by rows. First result are the column headers."""
ns = {"": "http://www.w3.org/2005/sparql-results#"}
headers = []
for head in elem.findall("head/variable", ns):
headers.append(head.attrib["name"])
yield headers
for result in elem.findall("results/result", ns):
row = []
for binding in result.findall("binding", ns):
n = binding[0]
if n.tag == "{http://www.w3.org/2005/sparql-results#}literal":
lang = n.get("{http://www.w3.org/XML/1998/namespace}lang")
datatype = n.get(
"datatype")
literal = n.text
if lang is not None:
literal += "@{}".format(lang)
if datatype is not None:
literal += "^^{}".format(datatype)
row.append(literal)
elif n.tag == "{http://www.w3.org/2005/sparql-results#}uri":
row.append("&lt;{}&gt;".format(n.text))
elif n.tag == "{http://www.w3.org/2005/sparql-results#}bnode":
row.append("&lt;_:{}&gt;".format(n.text))
else:
row.append("Unknown node: {}".format(ET.tostring(n)))
yield row
def json_row_iterator(obj):
headers = obj["head"]["vars"]
yield headers
for binding in obj["results"]["bindings"]:
row = []
for header in headers:
if header in binding:
val = binding[header]
if val["type"] == "uri":
row.append(f'&lt;{val["value"]}&gt;')
elif val["type"] == "literal":
suff = ""
if "xml:lang" in val:
suff += f'@{val["xml:lang"]}'
if "datatype" in val:
suff += f'^^{val["datatype"]}'
row.append(val["value"] + suff)
elif val["type"] == "bnode":
row.append(f'&lt;_:{val["value"]}&gt;')
else:
row.append("")
yield row
def graph_spo_iterator(graph):
yield ["subject", "predicate", "object"]
for s, p, o in graph:
yield [s, p, o]
def html_table(row_iter):
res = "<table>"
res += html_table_row(next(row_iter), True)
for row in row_iter:
res += html_table_row(row)
return res
def html_table_row(row, header=False):
res = "<tr>"
for cell in row:
res += html_table_cell(cell, header)
return res + "</tr>"
def html_table_cell(cell, header=False):
if header:
return "<th>{}</th>".format(cell)
return "<td>{}</td>".format(cell)
import argparse
import re
class StopCellExecution(Exception):
"""Special exception which can be raised to stop the execution of the cell without visible error."""
def _render_traceback_(self):
pass
class MagicParser(argparse.ArgumentParser):
def exit(self, status=0, message=None):
if status:
print("Parser exited with error: {}".format(message))
raise StopCellExecution
def error(self, message):
print("Error: {}".format(message))
self.exit()
def literal_to_string(literal, lang=None, datatype=None):
lit = f"\"{literal}\""
if lang is not None:
lit += f"@{lang}"
if datatype is not None:
lit += f"^^{datatype}"
return lit
def strip_comments(text):
"""Special comment strip function for formats which do not support comments (e.g. json)"""
return re.sub("###.*$", '', text, 0, re.M)
setup.py 0 → 100644
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
setuptools.setup(
name="jupyter-rdfify",
version="1.0.0",
author="Lars Pieschel",
author_email="lars.pieschel@rwth-aachen.de",
description="IPython Extension for semantic web technology support (Turtle, SPARQL, ShEx, etc.)",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://git.rwth-aachen.de/i5/teaching/jupyter-rdfify",
packages=setuptools.find_packages(),
install_requires=[
"rdflib",
"rdflib-jsonld",
"ipython>=7.18.0",
"graphviz",
"sparqlwrapper",
"requests",
"owlrl",
"PyShEx"
],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Framework :: IPython",
],
python_requires='>=3.6',
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment