Skip to content
Snippets Groups Projects
Commit e121990c authored by Frank Lange's avatar Frank Lange
Browse files

add endpoint /suggest/communities

parent b4003b53
No related branches found
No related tags found
No related merge requests found
Pipeline #1526681 passed
......@@ -230,6 +230,11 @@ class CurationSuggestResultItem:
value: str
@dataclass
class CurationSuggestPaginatedResult(PaginatedResult):
results: List[CurationSuggestResultItem] = None
@dataclass
class CurationSuggestDisciplinesResultItem(CurationSuggestResultItem):
children: List[CurationSuggestDisciplinesResultItem]
......
from typing import List
from rdflib import BNode, DCTERMS, Graph, Literal, RDF, Variable
from rdflib.collection import Collection
from project.dalia.api_models.api_models import (
CurationSuggestPaginatedResult,
CurationSuggestResultItem,
CurationSuggestSearchRequest,
)
from project.dalia.query.utils import query_dalia_dataset
from project.dalia.query_builder.query_builder import Aggregates, FunctionExpressions, QueryBuilder
from project.dalia.rdf.namespace import Jena_text, MoDalia
# data for endpoint /suggest/community
def get_communities_suggestions(request: CurationSuggestSearchRequest) -> CurationSuggestPaginatedResult:
query = "*" + request.q + "*"
limit = request.limit
offset = request.offset
return CurationSuggestPaginatedResult(
count=count_results_from_community_search(query),
offset=offset,
limit=limit,
results=_search_communities_and_retrieve_titles(query, limit, offset)
)
_VARIABLES = {
"community": Variable("community"),
"title": Variable("title"),
}
def _where_for_text_search(query: str, var_community: Variable, var_score: Variable):
subject_list_for_text_search = Collection(Graph(), BNode(), [var_community, var_score])
object_list_for_text_search = Collection(Graph(), BNode(), [DCTERMS.title, Literal(query)])
where = [
(subject_list_for_text_search, Jena_text.query, object_list_for_text_search),
(var_community, RDF.type, MoDalia.Community),
]
return tuple(where)
def prepare_query_for_community_search_and_title_retrieval(query: str, limit: int, offset: int) -> str:
var_community = _VARIABLES["community"]
var_score = Variable("score")
return QueryBuilder().SELECT(
*_VARIABLES.values()
).WHERE(
QueryBuilder().SELECT(
var_community,
distinct=True
).WHERE(
*_where_for_text_search(query, var_community, var_score)
).ORDER_BY(
FunctionExpressions.DESC(var_score)
).LIMIT(limit).OFFSET(offset).build(),
(var_community, DCTERMS.title, _VARIABLES["title"]),
).build()
def prepare_query_for_count_in_community_search(query: str) -> str:
var_community = _VARIABLES["community"]
var_score = Variable("score")
return QueryBuilder().SELECT(
count=Aggregates("COUNT", var_community, ["DISTINCT"]),
).WHERE(
*_where_for_text_search(query, var_community, var_score)
).build()
def _search_communities_and_retrieve_titles(query: str, limit: int, offset: int) -> List[CurationSuggestResultItem]:
sparql_query = prepare_query_for_community_search_and_title_retrieval(query, limit, offset)
results = query_dalia_dataset(sparql_query)
return [_process_result_from_metadata_retrieval(result) for result in results]
def _process_result_from_metadata_retrieval(result) -> CurationSuggestResultItem:
return CurationSuggestResultItem(
value=str(result.community),
label=str(result.title)
)
def count_results_from_community_search(query: str) -> int:
sparql_query = prepare_query_for_count_in_community_search(query)
results = query_dalia_dataset(sparql_query)
return next(results.__iter__()).get("count").toPython()
......@@ -28,6 +28,16 @@ class ItemSearchRequestSerializer(DataclassSerializer):
dataclass = api_models.ItemSearchRequest
class CurationSuggestSearchRequestSerializer(DataclassSerializer):
class Meta:
dataclass = api_models.CurationSuggestSearchRequest
class CurationSuggestPaginatedResultSerializer(DataclassSerializer):
class Meta:
dataclass = api_models.CurationSuggestPaginatedResult
class CurationSuggestDisciplinesResultItemSerializer(DataclassSerializer):
class Meta:
dataclass = api_models.CurationSuggestDisciplinesResultItem
......
......@@ -12,6 +12,7 @@ urlpatterns = [
path('v1/items/<uuid:resource_id>', views.ItemView.as_view(), name="dalia_item"),
path('v1/items', views.ItemSearchView.as_view(), name="dalia_item_search"),
path('v1/items/<uuid:resource_id>/suggestions', views.ItemSuggestionsView.as_view(), name="item_suggestions"),
path('v1/suggest/communities', views.CurationSuggestCommunitiesView.as_view(), name="curation_suggest_communities"),
path('v1/suggest/disciplines', views.CurationSuggestDisciplinesView.as_view(), name="curation_suggest_disciplines"),
path('v1/suggest/licenses', views.CurationSuggestLicensesView.as_view(), name="curation_suggest_licenses"),
]
......@@ -6,6 +6,7 @@ from rest_framework.response import Response
from rest_framework.views import APIView
from project.dalia.api_models.api_models import ItemSearchResult
from project.dalia.curation.suggest.communities import get_communities_suggestions
from project.dalia.curation.suggest.disciplines import get_disciplines_suggestions
from project.dalia.curation.suggest.licenses import get_licenses_suggestions
from project.dalia.query.communities.communities import get_metadata_for_community
......@@ -87,6 +88,18 @@ class ItemSuggestionsView(APIView):
return Response(serializer.data)
# endpoint /suggest/community
class CurationSuggestCommunitiesView(APIView):
def get(self, request: Request):
request_serializer = serializers.CurationSuggestSearchRequestSerializer(data=request.query_params)
request_serializer.is_valid(raise_exception=True)
result_serializer = serializers.CurationSuggestPaginatedResultSerializer(
get_communities_suggestions(request_serializer.validated_data)
)
return Response(result_serializer.data)
# endpoint /suggest/disciplines
class CurationSuggestDisciplinesView(APIView):
def get(self, request: Request):
......
import pytest
from django.urls import reverse
from rest_framework import status
from rest_framework.exceptions import ErrorDetail
from project.dalia.api_models.api_models import (
CurationSuggestPaginatedResult,
CurationSuggestResultItem,
CurationSuggestSearchRequest,
)
from project.dalia.curation.suggest.communities import (
count_results_from_community_search, get_communities_suggestions,
prepare_query_for_community_search_and_title_retrieval,
prepare_query_for_count_in_community_search,
)
from project.dalia.serializers import CurationSuggestPaginatedResultSerializer
from tests.project.dalia.utils import dedent_and_normalize, normalize
def test_prepare_query_for_community_search_and_title_retrieval():
query = prepare_query_for_community_search_and_title_retrieval(
query="abc",
limit=20,
offset=5
)
assert normalize(query) == dedent_and_normalize("""
SELECT ?community ?title
WHERE {
{
SELECT DISTINCT ?community
WHERE {
( ?community ?score ) <http://jena.apache.org/text#query> ( <http://purl.org/dc/terms/title> "abc" ) .
?community <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://purl.org/ontology/modalia#Community> .
}
ORDER BY DESC ( ?score )
LIMIT 20
OFFSET 5
}
?community <http://purl.org/dc/terms/title> ?title .
}
""")
def test_prepare_query_for_count_in_community_search():
query = prepare_query_for_count_in_community_search(query="abc")
assert normalize(query) == dedent_and_normalize("""
SELECT (COUNT( DISTINCT ?community ) as ?count)
WHERE {
( ?community ?score ) <http://jena.apache.org/text#query> ( <http://purl.org/dc/terms/title> "abc" ) .
?community <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://purl.org/ontology/modalia#Community> .
}
""")
def test_get_communities_suggestions(triplestore):
request = CurationSuggestSearchRequest(q="NFDI", limit=5, offset=10)
result = get_communities_suggestions(request)
assert result.count == 20
assert result.limit == 5
assert result.offset == 10
result_values = list(map(lambda result_item: result_item.value, result.results))
assert result_values == [
"https://id.dalia.education/community/7783f91b-2496-4c1b-97ef-9db578d237ca",
"https://id.dalia.education/community/7c45de27-5996-40b5-8e0e-24afdfe148e5",
"https://id.dalia.education/community/7ecf1a3e-e377-4f5c-ac70-78d498951843",
"https://id.dalia.education/community/8cd01866-7560-4701-ba4a-da3c939b9061",
"https://id.dalia.education/community/aac7b1be-cf00-4fdc-a26a-e8e0e1410b18",
]
result_labels = list(map(lambda result_item: result_item.label, result.results))
assert result_labels == [
"NFDI4Cat",
"NFDI4Health",
"BERD@NFDI",
"Base4NFDI",
"NFDI4Biodiversity",
]
@pytest.mark.parametrize(
"query, expected_count",
[
("*", 38),
("*NFDI*", 20),
("*Chem*", 2),
]
)
def test_count_results_from_community_search(
triplestore, query, expected_count
):
assert count_results_from_community_search(query) == expected_count
@pytest.mark.parametrize(
"request_data, expected_response_data",
[
(
{"limit": 2},
CurationSuggestPaginatedResult(
count=38,
offset=0,
limit=2,
results=[
CurationSuggestResultItem(
label='DAPHNE4NFDI',
value='https://id.dalia.education/community/0393d642-340d-4641-8c1f-e9c8b27199bf'
),
CurationSuggestResultItem(
label='RADAR',
value='https://id.dalia.education/community/0957e041-54d9-4f72-812c-013fcc48c2f3'
),
]
)
),
(
{"q": "NFDI", "limit": 1, "offset": 10},
CurationSuggestPaginatedResult(
count=20,
offset=10,
limit=1,
results=[
CurationSuggestResultItem(
label='NFDI4Cat',
value='https://id.dalia.education/community/7783f91b-2496-4c1b-97ef-9db578d237ca'
),
]
)
),
(
{"q": "abc", "limit": 100},
CurationSuggestPaginatedResult(
count=0,
offset=0,
limit=100,
results=[]
)
),
]
)
def test_get_on_CurationSuggestCommunitiesView_returns_200_and_valid_data(
triplestore, api_client, request_data, expected_response_data
):
response = api_client.get(reverse("curation_suggest_communities"), data=request_data)
assert response.status_code == status.HTTP_200_OK
serializer = CurationSuggestPaginatedResultSerializer(data=response.data)
assert serializer.is_valid()
data = serializer.validated_data
assert data == expected_response_data
@pytest.mark.parametrize(
"request_data, expected_error_response",
[
(
{"limit": 0},
{'limit': [ErrorDetail(string='Ensure this value is greater than or equal to 1.', code='min_value')]},
),
(
{"offset": -1},
{'offset': [ErrorDetail(string='Ensure this value is greater than or equal to 0.', code='min_value')]},
),
(
{"limit": 0, "offset": -1},
{
'limit': [ErrorDetail(string='Ensure this value is greater than or equal to 1.', code='min_value')],
'offset': [
ErrorDetail(string='Ensure this value is greater than or equal to 0.', code='min_value')
],
},
),
]
)
def test_get_on_CurationSuggestCommunitiesView_returns_400_for_invalid_request_data(
api_client, request_data, expected_error_response
):
response = api_client.get(reverse("curation_suggest_communities"), data=request_data)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.data == expected_error_response
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment