Skip to content
Snippets Groups Projects
Commit ccf11b6f authored by Frank Lange's avatar Frank Lange
Browse files

add endpoint /curation/suggest/languages

parent 1bc7327e
No related branches found
No related tags found
No related merge requests found
Pipeline #1628368 passed
from typing import List
from rdflib import BNode, Graph, Literal, Variable
from rdflib.collection import Collection
from project.dalia.api_models.api_models import (
CurationSuggestPaginatedResult,
CurationSuggestSearchRequest,
LabelValueItem,
)
from project.dalia.query.utils import query_ontologies_dataset
from project.dalia.query_builder.query_builder import (
Aggregates,
BIND,
FILTER,
FunctionExpressions,
OPTIONAL,
Operators,
QueryBuilder,
)
from project.dalia.rdf.namespace import Jena_text, SKOS_last_call, lvont
# data for endpoint /curation/suggest/languages
def get_languages_suggestions(request: CurationSuggestSearchRequest) -> CurationSuggestPaginatedResult:
query = request.q + "*"
limit = request.limit
offset = request.offset
return CurationSuggestPaginatedResult(
count=count_results_from_languages_search(query),
offset=offset,
limit=limit,
results=_search_languages_and_retrieve_labels(query, limit, offset)
)
_VARIABLES = {
"lang": Variable("lang"),
"label": Variable("label"),
}
def _where_for_text_search(query: str, var_lang: Variable, var_score: Variable, var_label: Variable):
subject_list_for_text_search = Collection(Graph(), BNode(), [var_lang, var_score, var_label])
object_list_for_text_search = Collection(Graph(), BNode(), [SKOS_last_call.prefLabel, Literal(query)])
where = [
(subject_list_for_text_search, Jena_text.query, object_list_for_text_search),
FILTER(
Operators.EQ(
FunctionExpressions.LANG(var_label),
Literal("en")
)
),
]
return tuple(where)
def prepare_query_for_language_search_and_label_retrieval(query: str, limit: int, offset: int) -> str:
var_lang = _VARIABLES["lang"]
var_score = Variable("score")
var_label = _VARIABLES["label"]
var_iso639P1Code = Variable("iso639P1Code")
var_boundIso639P1Code = Variable("boundIso639P1Code")
return QueryBuilder().SELECT(
*_VARIABLES.values()
).WHERE(
*_where_for_text_search(query, var_lang, var_score, var_label),
OPTIONAL(
(var_lang, lvont.iso639P1Code, var_iso639P1Code),
),
# In case of identical text search scores we would like to show languages with ISO639-1 code first, but
# ?iso639P1Code might be unbound. This causes problems, because in the ORDER BY clause unbound values come first
# (see https://www.w3.org/TR/sparql11-query/#modOrderBy).
# The workaround is to bind ?boundIso639P1Code to the boolean "false" in case ?iso639P1Code is unbound. In
# Apache Jena's SPARQL engine strings have a lower order than booleans in the ORDER BY clause.
BIND(FunctionExpressions.COALESCE(var_iso639P1Code, Literal(False)), var_boundIso639P1Code),
).ORDER_BY(
FunctionExpressions.DESC(var_score),
var_boundIso639P1Code,
).LIMIT(limit).OFFSET(offset).build()
def prepare_query_for_count_in_language_search(query: str) -> str:
var_lang = _VARIABLES["lang"]
var_score = Variable("score")
var_label = _VARIABLES["label"]
return QueryBuilder().SELECT(
count=Aggregates.COUNT(var_lang),
).WHERE(
*_where_for_text_search(query, var_lang, var_score, var_label),
).build()
def _search_languages_and_retrieve_labels(query: str, limit: int, offset: int) -> List[LabelValueItem]:
sparql_query = prepare_query_for_language_search_and_label_retrieval(query, limit, offset)
results = query_ontologies_dataset(sparql_query)
return [_process_result_from_metadata_retrieval(result) for result in results]
def _process_result_from_metadata_retrieval(result) -> LabelValueItem:
return LabelValueItem(
value=str(result.lang),
label=str(result.label)
)
def count_results_from_languages_search(query: str) -> int:
sparql_query = prepare_query_for_count_in_language_search(query)
results = query_ontologies_dataset(sparql_query)
return next(results.__iter__()).get("count").toPython()
"""
Terms from the Lexvo.org Ontology (http://lexvo.org/ontology)
"""
from rdflib import URIRef
NS = "http://lexvo.org/ontology#"
# Properties
iso639P1Code = URIRef(NS + "iso639P1Code")
......@@ -14,6 +14,7 @@ urlpatterns = [
path('v1/items/<uuid:resource_id>/suggestions', views.ItemSuggestionsView.as_view(), name="item_suggestions"),
path('v1/curation/suggest/communities', views.CurationSuggestCommunitiesView.as_view(), name="curation_suggest_communities"),
path('v1/curation/suggest/learning-resource-types', views.CurationSuggestLearningResourceTypesView.as_view(), name="curation_suggest_learning_resource_types"),
path('v1/curation/suggest/languages', views.CurationSuggestLanguagesView.as_view(), name="curation_suggest_languages"),
path('v1/curation/suggest/disciplines', views.CurationSuggestDisciplinesView.as_view(), name="curation_suggest_disciplines"),
path('v1/curation/suggest/licenses', views.CurationSuggestLicensesView.as_view(), name="curation_suggest_licenses"),
path('v1/curation/suggest/proficiency-levels', views.CurationSuggestProficiencyLevelsView.as_view(), name="curation_suggest_proficiency_levels"),
......
......@@ -8,6 +8,7 @@ from rest_framework.views import APIView
from project.dalia.api_models.api_models import ItemSearchResult
from project.dalia.curation.suggest.communities import get_communities_suggestions
from project.dalia.curation.suggest.disciplines import get_disciplines_suggestions
from project.dalia.curation.suggest.languages import get_languages_suggestions
from project.dalia.curation.suggest.learning_resource_types import get_learning_resource_types_suggestions
from project.dalia.curation.suggest.media_types import get_media_types_suggestions
from project.dalia.curation.suggest.proficiency_levels import get_proficiency_levels_suggestions
......@@ -114,6 +115,18 @@ class CurationSuggestLearningResourceTypesView(APIView):
return Response(serializer.data)
# endpoint /curation/suggest/languages
class CurationSuggestLanguagesView(APIView):
def get(self, request: Request):
request_serializer = serializers.CurationSuggestSearchRequestSerializer(data=request.query_params)
request_serializer.is_valid(raise_exception=True)
result_serializer = serializers.CurationSuggestPaginatedResultSerializer(
get_languages_suggestions(request_serializer.validated_data)
)
return Response(result_serializer.data)
# endpoint /curation/suggest/disciplines
class CurationSuggestDisciplinesView(APIView):
def get(self, request: Request):
......
import pytest
from django.urls import reverse
from rest_framework import status
from rest_framework.exceptions import ErrorDetail
from project.dalia.api_models.api_models import (
CurationSuggestPaginatedResult,
CurationSuggestSearchRequest,
LabelValueItem,
)
from project.dalia.curation.suggest.languages import (
count_results_from_languages_search,
get_languages_suggestions,
prepare_query_for_count_in_language_search,
prepare_query_for_language_search_and_label_retrieval,
)
from project.dalia.serializers import CurationSuggestPaginatedResultSerializer
from tests.project.dalia.utils import dedent_and_normalize, normalize
def test_prepare_query_for_language_search_and_label_retrieval():
query = prepare_query_for_language_search_and_label_retrieval(
query="fr*",
limit=10,
offset=42
)
assert normalize(query) == dedent_and_normalize("""
SELECT ?lang ?label
WHERE {
( ?lang ?score ?label ) <http://jena.apache.org/text#query> ( <http://www.w3.org/2008/05/skos#prefLabel> "fr*" ) .
FILTER ( LANG ( ?label ) = "en" ) .
OPTIONAL {
?lang <http://lexvo.org/ontology#iso639P1Code> ?iso639P1Code .
} .
BIND ( COALESCE ( ?iso639P1Code, "false"^^<http://www.w3.org/2001/XMLSchema#boolean> ) AS ?boundIso639P1Code ) .
}
ORDER BY DESC ( ?score ) ?boundIso639P1Code
LIMIT 10
OFFSET 42
""")
def test_prepare_query_for_count_in_language_search():
query = prepare_query_for_count_in_language_search(query="fr*")
assert normalize(query) == dedent_and_normalize("""
SELECT (COUNT( ?lang ) as ?count)
WHERE {
( ?lang ?score ?label ) <http://jena.apache.org/text#query> ( <http://www.w3.org/2008/05/skos#prefLabel> "fr*" ) .
FILTER ( LANG ( ?label ) = "en" ) .
}
""")
def test_get_languages_suggestions(triplestore):
request = CurationSuggestSearchRequest(q="en", limit=4, offset=13)
result = get_languages_suggestions(request)
assert result.count == 38
assert result.limit == 4
assert result.offset == 13
result_values = list(map(lambda result_item: result_item.value, result.results))
assert result_values == [
"http://lexvo.org/id/iso639-3/ptt",
"http://lexvo.org/id/iso639-3/enu",
"http://lexvo.org/id/iso639-3/enw",
"http://lexvo.org/id/iso639-3/env",
]
result_labels = list(map(lambda result_item: result_item.label, result.results))
assert result_labels == [
"Enrekang",
"Enu",
"Enwan (Akwa Ibom State)",
"Enwan (Edu State)",
]
@pytest.mark.parametrize(
"query, expected_count",
[
("*", 7771),
("germ*", 11),
("KLIN*", 1),
]
)
def test_count_results_from_languages_search(triplestore, query, expected_count):
assert count_results_from_languages_search(query) == expected_count
@pytest.mark.parametrize(
"request_data, expected_response_data",
[
(
{"limit": 2},
CurationSuggestPaginatedResult(
count=7771,
offset=0,
limit=2,
results=[
LabelValueItem(label="Afar", value="http://lexvo.org/id/iso639-3/aar"),
LabelValueItem(label="Abkhazian", value="http://lexvo.org/id/iso639-3/abk"),
]
)
),
(
{"q": "Klin"},
CurationSuggestPaginatedResult(
count=1,
offset=0,
limit=10,
results=[
LabelValueItem(label="Klingon",value="http://lexvo.org/id/iso639-3/tlh"),
]
)
),
(
{"q": "fr", "limit": 4},
CurationSuggestPaginatedResult(
count=26,
offset=0,
limit=4,
results=[
LabelValueItem(label="French", value="http://lexvo.org/id/iso639-3/fra"),
LabelValueItem(label="Western Frisian", value="http://lexvo.org/id/iso639-3/fry"),
LabelValueItem(label="Cajun French", value="http://lexvo.org/id/iso639-3/frc"),
LabelValueItem(label="Eastern Frisian", value="http://lexvo.org/id/iso639-3/frs"),
]
)
),
(
{"q": "fre", "limit": 1, "offset": 10},
CurationSuggestPaginatedResult(
count=14,
offset=10,
limit=1,
results=[
LabelValueItem(label="Saint Lucian Creole French", value="http://lexvo.org/id/iso639-3/acf"),
]
)
),
(
{"q": "abc", "limit": 100},
CurationSuggestPaginatedResult(
count=0,
offset=0,
limit=100,
results=[]
)
),
]
)
def test_get_on_CurationSuggestLanguagesView_returns_200_and_valid_data(
triplestore, api_client, request_data, expected_response_data
):
response = api_client.get(reverse("curation_suggest_languages"), data=request_data)
assert response.status_code == status.HTTP_200_OK
serializer = CurationSuggestPaginatedResultSerializer(data=response.data)
assert serializer.is_valid()
data = serializer.validated_data
assert data == expected_response_data
@pytest.mark.parametrize(
"request_data, expected_error_response",
[
(
{"limit": 0},
{
'limit': [ErrorDetail(string='Ensure this value is greater than or equal to 1.', code='min_value')],
},
),
(
{"offset": -1},
{
'offset': [ErrorDetail(string='Ensure this value is greater than or equal to 0.', code='min_value')],
},
),
(
{"limit": 0, "offset": -1},
{
'limit': [ErrorDetail(string='Ensure this value is greater than or equal to 1.', code='min_value')],
'offset': [ErrorDetail(string='Ensure this value is greater than or equal to 0.', code='min_value')],
},
),
]
)
def test_get_on_CurationSuggestLanguagesView_returns_400_for_invalid_request_data(
api_client, request_data, expected_error_response
):
response = api_client.get(reverse("curation_suggest_languages"), data=request_data)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.data == expected_error_response
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment