diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..744a3291a56890e592d146cb26ccc12746932c50 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,79 @@ +# Base image: https://github.com/adoptium/containers/blob/main/17/jre/ubuntu/jammy/Dockerfile.releases.full +FROM eclipse-temurin:17-jre + +ARG FUSEKI_VERSION=5.0.0 +ARG FUSEKI_SHA256=1daf3f94883eacc597eb702e65ff0dad7fbdea341ee54a29f34fc37f85af40b9 + +ENV FUSEKI_HOME /fuseki +ENV FUSEKI_BASE /database +ENV CONFIG_TEMPLATES_DIR /config-templates +ENV ONTOLOGIES_DIR /ontologies +ENV SCRIPTS_DIR /scripts + +RUN \ + set -ex && \ +# install additional packages + apt-get update && \ + apt-get install -y tini cron gosu python3 && \ + rm -rf /var/lib/apt/lists/* && \ +# add user that is going to run fuseki + groupadd --gid=10001 fuseki && \ + useradd -g fuseki --uid 10000 --no-create-home fuseki + +# +# Download and install Fuseki +# +WORKDIR /tmp + +RUN \ + set -ex && \ + FUSEKI_TGZ=apache-jena-fuseki-$FUSEKI_VERSION.tar.gz && \ + wget --no-verbose https://repo1.maven.org/maven2/org/apache/jena/apache-jena-fuseki/$FUSEKI_VERSION/$FUSEKI_TGZ && \ + echo "$FUSEKI_SHA256 $FUSEKI_TGZ" | sha256sum --status -c && \ + tar -xzf $FUSEKI_TGZ && \ + rm $FUSEKI_TGZ && \ + mv apache-jena-fuseki-$FUSEKI_VERSION $FUSEKI_HOME + +# +# Download ontologies +# +WORKDIR $ONTOLOGIES_DIR + +RUN \ + set -ex && \ + # DFG Fachsystematik Ontology / DFG Classification of Subject Areas Ontology + # see https://github.com/tibonto/DFG-Fachsystematik-Ontology + wget --no-verbose https://raw.githubusercontent.com/tibonto/DFG-Fachsystematik-Ontology/b6e9983bc1eb33aeab034dd27f5f08cff227ff0f/dfgfo.ttl && \ + echo "0b168a90bc3fcb10457b2bd98b5969b2fd972725481bcc8aad149dd4f446be74 dfgfo.ttl" | sha256sum --status -c + +# +# Copy config templates +# +COPY config/ $CONFIG_TEMPLATES_DIR + +# +# Copy scripts +# +COPY scripts/docker-entrypoint.sh / +COPY scripts/load.sh scripts/textindex.sh scripts/compact_dalia.sh scripts/healthcheck.sh scripts/backup.py $SCRIPTS_DIR/ + +# +# Install compact_dalia.sh as daily job +# (Note: files executed by run-parts may not contain dots in their name) +# +RUN \ + set -ex && \ + ln -s $SCRIPTS_DIR/compact_dalia.sh /etc/cron.daily/compact_dalia + +HEALTHCHECK \ + --interval=5s \ + --timeout=2s \ + --start-period=10s \ + CMD $SCRIPTS_DIR/healthcheck.sh + +WORKDIR $FUSEKI_HOME +VOLUME $FUSEKI_BASE +EXPOSE 3030 + +ENTRYPOINT ["/usr/bin/tini", "--", "sh", "/docker-entrypoint.sh"] +CMD ["sh", "-c", "cron && gosu fuseki ./fuseki-server --verbose"] diff --git a/config/config.ttl b/config/config.ttl new file mode 100644 index 0000000000000000000000000000000000000000..7b9ac5bc7cce1139c385859f7250d20d5b7e0d7c --- /dev/null +++ b/config/config.ttl @@ -0,0 +1,30 @@ +# Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 + +## Fuseki Server configuration file. + +@prefix : <#> . +@prefix fuseki: <http://jena.apache.org/fuseki#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> . + +[] rdf:type fuseki:Server ; + # Example:: + # Server-wide query timeout. + # + # Timeout - server-wide default: milliseconds. + # Format 1: "1000" -- 1 second timeout + # Format 2: "10000,60000" -- 10s timeout to first result, + # then 60s timeout for the rest of query. + # + # See javadoc for ARQ.queryTimeout for details. + # This can also be set on a per dataset basis in the dataset assembler. + # + # ja:context [ ja:cxtName "arq:queryTimeout" ; ja:cxtValue "30000" ] ; + + # Add any custom classes you want to load. + # Must have a "public static void init()" method. + # ja:loadClass "your.code.Class" ; + + # End triples. + . diff --git a/config/dalia.ttl b/config/dalia.ttl new file mode 100644 index 0000000000000000000000000000000000000000..f8b6700ae0c73fe7094a87636d0a68623a62a402 --- /dev/null +++ b/config/dalia.ttl @@ -0,0 +1,65 @@ +@prefix : <http://base/#> . +@prefix fuseki: <http://jena.apache.org/fuseki#> . +@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix tdb2: <http://jena.apache.org/2016/tdb#> . + +:service_tdb_all + rdf:type fuseki:Service ; + fuseki:name "dalia" ; + rdfs:label "DALIA dataset" ; + fuseki:dataset :tdb2_dataset ; + + # endpoint /dalia/ + # SPARQL Query Language, SPARQL Update and SPARQL Graph Store HTTP Protocol (read+write) + fuseki:endpoint [ + fuseki:operation fuseki:query ; + ] ; + fuseki:endpoint [ + fuseki:operation fuseki:update ; + ] ; + fuseki:endpoint [ + fuseki:operation fuseki:gsp-rw ; + ] ; + + # endpoint /dalia/query + # SPARQL Query Language + fuseki:endpoint [ + fuseki:name "query" ; + fuseki:operation fuseki:query ; + ] ; + + # endpoint /dalia/sparql + # SPARQL Query Language + fuseki:endpoint [ + fuseki:name "sparql" ; + fuseki:operation fuseki:query ; + ] ; + + # endpoint /dalia/get + # SPARQL Graph Store HTTP Protocol (read) + fuseki:endpoint [ + fuseki:name "get" ; + fuseki:operation fuseki:gsp-r ; + ] ; + + # endpoint /dalia/update + # SPARQL Update + fuseki:endpoint [ + fuseki:name "update" ; + fuseki:operation fuseki:update ; + ] ; + + # endpoint /dalia/data + # SPARQL Graph Store HTTP Protocol (read+write) + fuseki:endpoint [ + fuseki:name "data" ; + fuseki:operation fuseki:gsp-rw ; + ] ; + . + +:tdb2_dataset + rdf:type tdb2:DatasetTDB2 ; + tdb2:location "/database/databases/dalia" + . diff --git a/config/ontologies.ttl b/config/ontologies.ttl new file mode 100644 index 0000000000000000000000000000000000000000..a02af24a2c035f955e7b94ba919768063afa1bee --- /dev/null +++ b/config/ontologies.ttl @@ -0,0 +1,81 @@ +@prefix : <http://base/#> . +@prefix fuseki: <http://jena.apache.org/fuseki#> . +@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix tdb2: <http://jena.apache.org/2016/tdb#> . +@prefix text: <http://jena.apache.org/text#> . +@prefix skos: <http://www.w3.org/2004/02/skos/core#> . + +:service_tdb_all + rdf:type fuseki:Service ; + fuseki:name "ontologies" ; + rdfs:label "Ontologies dataset" ; + fuseki:dataset :text_dataset ; + + # endpoint /ontologies/ + # SPARQL Query Language and SPARQL Graph Store HTTP Protocol (read) + fuseki:endpoint [ + fuseki:operation fuseki:query ; + ] ; + fuseki:endpoint [ + fuseki:operation fuseki:gsp-r ; + ] ; + + # endpoint /ontologies/query + # SPARQL Query Language + fuseki:endpoint [ + fuseki:name "query" ; + fuseki:operation fuseki:query ; + ] ; + + # endpoint /ontologies/sparql + # SPARQL Query Language + fuseki:endpoint [ + fuseki:name "sparql" ; + fuseki:operation fuseki:query ; + ] ; + + # endpoint /ontologies/get + # SPARQL Graph Store HTTP Protocol (read) + fuseki:endpoint [ + fuseki:name "get" ; + fuseki:operation fuseki:gsp-r ; + ] ; + . + +:text_dataset + rdf:type text:TextDataset ; + text:dataset :tdb2_dataset ; + text:index :indexLucene ; + . + +:tdb2_dataset + rdf:type tdb2:DatasetTDB2 ; + tdb2:location "/database/databases/ontologies" ; + . + +:indexLucene + rdf:type text:TextIndexLucene ; + text:directory <file:/database/databases/ontologies/lucene> ; + text:entityMap :entMap ; + . + +:entMap + rdf:type text:EntityMap ; + text:defaultField "rdfsLabel" ; + text:entityField "uri" ; + text:uidField "uid" ; + text:langField "lang" ; + text:graphField "graph" ; + text:map ( + [ + text:field "rdfsLabel" ; + text:predicate rdfs:label ; + ] + [ + text:field "skosAltLabel" ; + text:predicate skos:altLabel ; + ] + ) + . diff --git a/config/shiro.ini b/config/shiro.ini new file mode 100644 index 0000000000000000000000000000000000000000..6ce9d5de998d58357dcdc778263c342e75fec9f1 --- /dev/null +++ b/config/shiro.ini @@ -0,0 +1,39 @@ +# Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 + +[main] +# Development +ssl.enabled = false + +plainMatcher=org.apache.shiro.authc.credential.SimpleCredentialsMatcher +#iniRealm=org.apache.shiro.realm.text.IniRealm +iniRealm.credentialsMatcher = $plainMatcher + +localhostFilter=org.apache.jena.fuseki.authz.LocalhostFilter + +[users] +# Implicitly adds "iniRealm = org.apache.shiro.realm.text.IniRealm" +admin=pw + +[roles] + +[urls] +## Control functions open to anyone +/$/status = anon +/$/server = anon +/$/ping = anon +/$/metrics = anon + +## and the rest are restricted to localhost. +/$/** = localhostFilter + +## If you want simple, basic authentication user/password +## on the operations, +## 1 - set a better password in [users] above. +## 2 - comment out the "/$/** = localhost" line and use: +## "/$/** = authcBasic,user[admin]" + +## or to allow any access. +##/$/** = anon + +# Everything else +/**=anon diff --git a/scripts/backup.py b/scripts/backup.py new file mode 100755 index 0000000000000000000000000000000000000000..bfae979ddf8cb33af3600961075dd11dd094e896 --- /dev/null +++ b/scripts/backup.py @@ -0,0 +1,110 @@ +#!/usr/bin/python3 +# +# Trigger the backup of a dataset in Fuseki and wait for it to finish. +# +# Exit codes: +# 0 - backup successful +# 1 - any kind of error +# +import inspect +import json +import sys +import time +import urllib.parse +import urllib.request +from typing import Any +from urllib.error import HTTPError, URLError + + +# Settings for polling Fuseki's tasks endpoint +POLL_INTERVAL = 1 # interval in seconds between HTTP requests +MAX_POLLS = 100 # maximum number of HTTP requests + + +def print_to_stderr(s: str) -> None: + print(s, file=sys.stderr) + + +def print_help(script_path: str) -> None: + help_str = f""" + Trigger the backup of a dataset in Fuseki + Usage: {script_path} <dataset name> + """ + print_to_stderr(inspect.cleandoc(help_str)) + + +def parse_args(args: list[str]) -> str: + if len(args) < 2: + print_help(args[0]) + exit(1) + + return args[1] + + +def call_http(url: str, post_data: Any = None) -> Any: + post_data = None if post_data is None else urllib.parse.urlencode(post_data).encode("ascii") + + try: + response = urllib.request.urlopen(url, data=post_data) + except HTTPError as e: + print_to_stderr(f"Call to {url} ended with error code {e.code}.") + exit(1) + except URLError as e: + print_to_stderr(f"Connection to {url} failed. Reason: {e.reason}") + exit(1) + else: + return json.loads(response.read().decode(response.info().get_content_charset('utf-8'))) + + +# JSON returned by backup call: +# {'taskId': '1', 'requestId': 3} +def trigger_backup(dataset_name: str) -> str: + url = f"http://127.0.0.1:3030/$/backup/{dataset_name}" + backup_json = call_http(url, {}) + + key = "taskId" + if key not in backup_json: + print_to_stderr(f"Could not find key '{key}' in JSON '{backup_json}'.") + exit(1) + + return backup_json[key] + + +# Unfinished task JSON: +# {'task': 'Backup', 'taskId': '15', 'started': '2023-06-19T13:24:50.969+00:00'} +# +# Finished task JSON: +# {'task': 'Backup', 'taskId': '15', 'started': '2023-06-19T13:24:50.969+00:00', +# 'finished': '2023-06-19T13:24:50.974+00:00', 'success': True} +def poll_task(taskid: str) -> None: + url = f"http://127.0.0.1:3030/$/tasks/{taskid}" + + for _ in range(MAX_POLLS): + task_json = call_http(url) + + if "finished" in task_json: + success = False + + if "success" in task_json: + success = task_json["success"] + + if success: + exit(0) + else: + print_to_stderr("Backup was unsuccessful.") + exit(1) + + time.sleep(POLL_INTERVAL) + + print_to_stderr("MAX_POLLS reached, aborting.") + exit(1) + + +def main(args: list[str]) -> None: + dataset_name = parse_args(args) + taskid = trigger_backup(dataset_name) + poll_task(taskid) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/compact_dalia.sh b/scripts/compact_dalia.sh new file mode 100755 index 0000000000000000000000000000000000000000..42b46079cc7b9b4c56b42cc7c53b7c600da7afe2 --- /dev/null +++ b/scripts/compact_dalia.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# +# Compacts the DALIA dataset by HTTP-calling its compact endpoint. +# +curl -X POST http://127.0.0.1:3030/$/compact/dalia?deleteOld=true diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh new file mode 100755 index 0000000000000000000000000000000000000000..15521e8fa6ae0869eb34b89fd859208fe2226128 --- /dev/null +++ b/scripts/docker-entrypoint.sh @@ -0,0 +1,26 @@ +#!/bin/sh +set -ex + +# +# Create or replace database configuration files +# +mkdir -p $FUSEKI_BASE/configuration +cp $CONFIG_TEMPLATES_DIR/shiro.ini $FUSEKI_BASE +cp $CONFIG_TEMPLATES_DIR/config.ttl $FUSEKI_BASE +cp $CONFIG_TEMPLATES_DIR/dalia.ttl $FUSEKI_BASE/configuration +cp $CONFIG_TEMPLATES_DIR/ontologies.ttl $FUSEKI_BASE/configuration + +# +# (Re-)create ontologies dataset, insert RDF data from /ontologies and build a +# text index for it. +# +rm -Rf $FUSEKI_BASE/databases/ontologies +find /ontologies \( -iname "*.ttl" -o -iname "*.rdf" -o -iname "*.owl" \) -type f -exec $SCRIPTS_DIR/load.sh $FUSEKI_BASE/configuration/ontologies.ttl {} \; +$SCRIPTS_DIR/textindex.sh $FUSEKI_BASE/configuration/ontologies.ttl + +# +# Make sure the fuseki user owns $FUSEKI_BASE +# +chown -R fuseki:fuseki $FUSEKI_BASE + +exec "$@" diff --git a/scripts/healthcheck.sh b/scripts/healthcheck.sh new file mode 100755 index 0000000000000000000000000000000000000000..989ab358dc881ff1605064eb5c7f4574802d8dda --- /dev/null +++ b/scripts/healthcheck.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# Health check of Fuseki and cron +# + +# checks Fuseki's ping endpoint +check_fuseki() { + (curl --fail --silent http://127.0.0.1:3030/$/ping > /dev/null) + + # return exit code of curl call + return $? +} + +# checks if the cron daemon process exists +CRON_PID_FILE=/var/run/crond.pid +check_cron() { + if [ -s $CRON_PID_FILE ]; then + PID=$(cat $CRON_PID_FILE) + if [ -d /proc/$PID ]; then + # cron is running + return 0 + fi + fi + + # cron is not running + return 1 +} + +(check_fuseki && check_cron) || exit 1 diff --git a/scripts/load.sh b/scripts/load.sh new file mode 100755 index 0000000000000000000000000000000000000000..8529f184bf98c1cf441ec8ad5fa5c92c86539b53 --- /dev/null +++ b/scripts/load.sh @@ -0,0 +1,13 @@ +#!/bin/sh +if [ "$#" -lt 2 ]; then + echo "Loads RDF data files into a TDB2 dataset" >&2 + echo "Usage:" >&2 + echo "$0 <TDB2 assembler description> <file> [<file> ...]" >&2 + exit 1 +fi + +ASSEMBLER=$1 +shift +FILES="$@" + +java -cp $FUSEKI_HOME/fuseki-server.jar:/javalibs/* tdb2.tdbloader --desc=$ASSEMBLER $FILES diff --git a/scripts/textindex.sh b/scripts/textindex.sh new file mode 100755 index 0000000000000000000000000000000000000000..4685884b6156eaa24065e324ad2cd60997072daf --- /dev/null +++ b/scripts/textindex.sh @@ -0,0 +1,11 @@ +#!/bin/sh +if [ "$#" -lt 1 ]; then + echo "Build a text index for a TDB2 dataset" >&2 + echo "Usage:" >&2 + echo "$0 <TDB2 assembler description>" >&2 + exit 1 +fi + +ASSEMBLER=$1 + +java -cp $FUSEKI_HOME/fuseki-server.jar:/javalibs/* jena.textindexer --desc=$ASSEMBLER diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..582f083f6bd4a21d20b6ff27e39fa15d006a7bb7 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,4 @@ +.env +.pytest_cache +__pycache__ +.idea \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..f9dd0f4a18347c8cacfe634d5117de299d6d98b3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +import pytest +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs + + +@pytest.fixture(scope="class") +def fuseki(): + with DockerContainer("fuseki-dalia-prod").with_exposed_ports(3030) as fuseki: + wait_for_logs(fuseki, "INFO Server :: Started") + yield fuseki + + +@pytest.fixture(scope="class") +def fuseki_url(fuseki): + return f"http://{fuseki.get_container_host_ip()}:{fuseki.get_exposed_port(3030)}" diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..006b507e3907cd17ed3c3e42e033784072756e9a --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,4 @@ +rdflib==7.0.0 +pytest +testcontainers==4.3.3 +requests diff --git a/tests/test_dalia_dataset.py b/tests/test_dalia_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2295e34151ec66fb73f959cb253e764160f1c451 --- /dev/null +++ b/tests/test_dalia_dataset.py @@ -0,0 +1,38 @@ +import pytest + +from test_utils import add_data, can_use_gsp_read, cannot_use_sparql_update, cannot_use_gsp_write, \ + can_use_sparql_update, can_use_gsp_write, can_use_sparql_query + +DATASET_NAME = "dalia" + + +class TestDaliaDatasetRead: + @pytest.mark.parametrize("endpoint_name", ["", "query", "sparql"]) + def test_can_use_sparql_query(self, fuseki_url, endpoint_name): + add_data(f"{fuseki_url}/{DATASET_NAME}/update") + + assert can_use_sparql_query(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["", "get", "data"]) + def test_can_use_gsp_get(self, fuseki_url, endpoint_name): + add_data(f"{fuseki_url}/{DATASET_NAME}/update") + + assert can_use_gsp_read(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["query", "sparql", "get", "data"]) + def test_endpoints_are_not_writeable_with_sparql_update(self, fuseki_url, endpoint_name): + assert cannot_use_sparql_update(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["query", "sparql", "get", "update"]) + def test_endpoints_are_not_writeable_with_gsp(self, fuseki_url, endpoint_name): + assert cannot_use_gsp_write(fuseki_url, DATASET_NAME, endpoint_name) + + +@pytest.mark.parametrize("endpoint_name", ["", "update"]) +def test_endpoints_are_writeable_with_sparql_update(fuseki_url, endpoint_name): + assert can_use_sparql_update(fuseki_url, DATASET_NAME, endpoint_name) + + +@pytest.mark.parametrize("endpoint_name", ["", "data"]) +def test_endpoints_are_writeable_with_gsp(fuseki_url, endpoint_name): + assert can_use_gsp_write(fuseki_url, DATASET_NAME, endpoint_name) diff --git a/tests/test_ontologies_dataset.py b/tests/test_ontologies_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..4907d986b9b62e5522e3030c80076a8e52f3251d --- /dev/null +++ b/tests/test_ontologies_dataset.py @@ -0,0 +1,79 @@ +import pytest +from rdflib.plugins.stores.sparqlstore import SPARQLStore + +from test_utils import can_use_sparql_query, cannot_use_gsp_write, \ + cannot_use_sparql_update, can_use_gsp_read + +DATASET_NAME = "ontologies" + + +class TestOntologiesDataset: + def test_text_search(self, fuseki_url): + endpoint = f"{fuseki_url}/{DATASET_NAME}" + store = SPARQLStore(query_endpoint=endpoint) + + result = store.query(""" + PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + PREFIX text: <http://jena.apache.org/text#> + + SELECT ?uri ?score ?label + WHERE { + (?uri ?score) text:query (rdfs:label 'ancient history') . + ?uri rdfs:label ?label . + } + ORDER BY DESC(?score) + """) + assert len(result) == 14 + assert list(map(lambda item: str(item.label), list(result))) == [ + 'Ancient History', + 'Ancient Cultures', + 'History', + 'Art History', + 'Medieval History', + 'Egyptology and Ancient Near Eastern Studies', + 'Early Modern History', + 'History of Philosophy', + 'History of Science', + 'Economic and Social History', + 'Modern and Current History', + 'General Education and History of Education', + 'Art History, Music, Theatre and Media Studies', + 'Architecture, Building and Construction History, Construction Research, Sustainable Building Technology' + ] + + result = store.query(""" + PREFIX skos: <http://www.w3.org/2004/02/skos/core#> + PREFIX text: <http://jena.apache.org/text#> + + SELECT ?uri ?score ?label + WHERE { + (?uri ?score) text:query (skos:altLabel 'Alte Geschichte') . + ?uri skos:altLabel ?label . + } + ORDER BY DESC(?score) + """) + assert len(result) == 6 + assert list(map(lambda item: str(item.label), list(result))) == [ + 'Alte Geschichte', + 'Alte Kulturen', + 'Frühneuzeitliche Geschichte', + 'Mittelalterliche Geschichte', + 'Geschichte der Philosophie', + 'Neuere und Neueste Geschichte (einschl. Europäische Geschichte der Neuzeit und Außereuropäische Geschichte)' + ] + + @pytest.mark.parametrize("endpoint_name", ["", "query", "sparql", "get"]) + def test_dataset_is_not_writeable_with_gsp(self, fuseki_url, endpoint_name): + assert cannot_use_gsp_write(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["", "query", "sparql", "get"]) + def test_dataset_is_not_writeable_with_sparql_update(self, fuseki_url, endpoint_name): + assert cannot_use_sparql_update(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["", "query", "sparql"]) + def test_can_use_sparql_query(self, fuseki_url, endpoint_name): + assert can_use_sparql_query(fuseki_url, DATASET_NAME, endpoint_name) + + @pytest.mark.parametrize("endpoint_name", ["", "get"]) + def test_can_use_gsp_get(self, fuseki_url, endpoint_name): + assert can_use_gsp_read(fuseki_url, DATASET_NAME, endpoint_name) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e4843ecffbf5eeef7876c1161bfd7e7694d5897d --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,112 @@ +from urllib.error import HTTPError + +import pytest +import requests +from rdflib import Graph +from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore + + +def add_data(update_endpoint): + store = SPARQLUpdateStore(update_endpoint=update_endpoint) + store.update(SPARQL_UPDATE_QUERY) + + +def is_http_error_code(code): + return 400 <= code < 500 + + +SPARQL_SPO_QUERY = """ + SELECT ?s ?p ?o + WHERE { + ?s ?p ?o . + } + LIMIT 1 +""" + +SPARQL_UPDATE_QUERY = """ + PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> + INSERT DATA { + <http://example.com/book/42> rdfs:label "Hitchhiker's Guide" . + } +""" + +PAYLOAD_TTL = """ + @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . + <http://example.com/book/42> rdfs:label "Hitchhiker's Guide" . +""" + + +def can_use_sparql_query(fuseki_url, dataset_name, endpoint_name): + endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}" + store = SPARQLStore(query_endpoint=endpoint) + + result = store.query(SPARQL_SPO_QUERY) + return len(result) == 1 + + +def can_use_sparql_update(fuseki_url, dataset_name, endpoint_name): + update_endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}" + read_endpoint = f"{fuseki_url}/{dataset_name}/" + store = SPARQLUpdateStore(query_endpoint=read_endpoint, update_endpoint=update_endpoint) + + store.update(SPARQL_UPDATE_QUERY) + + assert len(store.query(SPARQL_SPO_QUERY)) == 1 + return True + + +def cannot_use_sparql_update(fuseki_url, dataset_name, endpoint_name): + endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}" + store = SPARQLUpdateStore(update_endpoint=endpoint) + + with pytest.raises(HTTPError) as exc_info: + store.update(SPARQL_UPDATE_QUERY) + assert is_http_error_code(exc_info.value.status) + return True + + +def can_use_gsp_read(fuseki_url, dataset_name, endpoint_name): + endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}?default" + headers = {"Accept": "text/turtle"} + + r = requests.get(endpoint, headers=headers) + assert r.status_code == 200 + assert r.headers["content-type"] == "text/turtle" + assert len(r.content) > 0 + + g = Graph().parse(data=r.content) + assert len(g) > 0 + return True + + +def can_use_gsp_write(fuseki_url, dataset_name, endpoint_name): + endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}?default" + headers = {"Content-Type": "text/turtle"} + + r = requests.post(endpoint, data=PAYLOAD_TTL, headers=headers) + assert r.status_code == 200 + + r = requests.put(endpoint, data=PAYLOAD_TTL, headers=headers) + assert r.status_code == 200 + + r = requests.delete(endpoint) + assert r.status_code == 204 + + return True + + +def cannot_use_gsp_write(fuseki_url, dataset_name, endpoint_name): + endpoint = f"{fuseki_url}/{dataset_name}/{endpoint_name}?default" + headers = {"Content-Type": "text/turtle"} + + # RDFLib does not support the Graph Store HTTP Protocol (GSP), thus we use plain HTTP requests. + r = requests.post(endpoint, data=PAYLOAD_TTL, headers=headers) + assert is_http_error_code(r.status_code) + + r = requests.put(endpoint, data=PAYLOAD_TTL, headers=headers) + assert is_http_error_code(r.status_code) + + r = requests.delete(endpoint) + assert is_http_error_code(r.status_code) + + return True