diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..81b3e0aefe0560c1efa3d9d6e5e9198145320993 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,19 @@ +{ + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true, + "editor.rulers": [ + 88 + ], + "editor.renderWhitespace": "trailing", + "editor.codeActionsOnSave": { + "source.organizeImports.ruff": "explicit" + } + }, + "python.testing.pytestArgs": [ + "tests", + "-s" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/project/recommendation/__init__.py b/project/recommendation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/project/recommendation/admin.py b/project/recommendation/admin.py new file mode 100644 index 0000000000000000000000000000000000000000..8c38f3f3dad51e4585f3984282c2a4bec5349c1e --- /dev/null +++ b/project/recommendation/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/project/recommendation/api_models/__init__.py b/project/recommendation/api_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/project/recommendation/api_models/api_models.py b/project/recommendation/api_models/api_models.py new file mode 100644 index 0000000000000000000000000000000000000000..9d34c50ce56ba6e43c615e8056296a697ab13616 --- /dev/null +++ b/project/recommendation/api_models/api_models.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import List, Optional + +from project.dalia.api_models.api_models import Item + + +@dataclass +class SuggestedContents: + results: Optional[List[Item]] diff --git a/project/recommendation/apps.py b/project/recommendation/apps.py new file mode 100644 index 0000000000000000000000000000000000000000..fc75208e11b8baea5bfb88ae39a4a8bf1d5dfd7a --- /dev/null +++ b/project/recommendation/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class DaliaConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "project.recommendation" diff --git a/project/recommendation/materials/__init__.py b/project/recommendation/materials/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/project/recommendation/materials/suggested_content.py b/project/recommendation/materials/suggested_content.py new file mode 100644 index 0000000000000000000000000000000000000000..6c5754e92360e95a20ac7fafe2b78a0244e6f8ad --- /dev/null +++ b/project/recommendation/materials/suggested_content.py @@ -0,0 +1,160 @@ +from typing import Set +from uuid import UUID + +from rdflib import URIRef, Variable + +from project.dalia.query.items.metadata.items import get_metadata_for_learning_resources +from project.dalia.query.utils import query_dalia_dataset +from project.dalia.query_builder.query_builder import QueryBuilder +from project.dalia.rdf.dalia_kb import _LEARNING_RESOURCE_BASE_URI +from project.dalia.rdf.namespace import SCHEMA, MoDalia, fabio +from project.recommendation.api_models.api_models import ( + SuggestedContents, +) + + +def _get_shared_keywords(uuid: UUID): + query = f""" + PREFIX schema: <https://schema.org/> + SELECT ?sub (COUNT(?sharedKeyword) AS ?keywordCount) WHERE {{ + <{_LEARNING_RESOURCE_BASE_URI}{uuid}> schema:keywords ?sharedKeyword . + ?sub schema:keywords ?sharedKeyword . + FILTER(?sub != <{_LEARNING_RESOURCE_BASE_URI}{uuid}>) + }} + GROUP BY ?sub + HAVING(?keywordCount >= 2) + """ + + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +def _get_is_part_of(uuid: UUID): + var_material = Variable("material") + query = ( + QueryBuilder() + .SELECT(var_material) + .WHERE( + ( + URIRef(f"{_LEARNING_RESOURCE_BASE_URI}{uuid}"), + URIRef(URIRef(SCHEMA.NS + "isPartOf")), + var_material, + ) + ) + .build() + ) + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +def _get_is_related_to(uuid: UUID): + var_material = Variable("material") + query = ( + QueryBuilder() + .SELECT(var_material) + .WHERE( + ( + URIRef(f"{_LEARNING_RESOURCE_BASE_URI}{uuid}"), + URIRef(URIRef(MoDalia.NS + "isRelatedTo")), + var_material, + ) + ) + .build() + ) + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +def _get_is_based_on(uuid: UUID): + var_material = Variable("material") + query = ( + QueryBuilder() + .SELECT(var_material) + .WHERE( + ( + URIRef(f"{_LEARNING_RESOURCE_BASE_URI}{uuid}"), + URIRef(URIRef(MoDalia.NS + "isBasedOn")), + var_material, + ) + ) + .build() + ) + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +def _get_authors(uuid: UUID): + query = f""" + PREFIX arq: <http://jena.apache.org/ARQ/list#> + PREFIX schema: <https://schema.org/> + PREFIX m4i: <http://w3id.org/nfdi4ing/metadata4ing#> + + SELECT DISTINCT ?lr WHERE {{ + <{_LEARNING_RESOURCE_BASE_URI}{uuid}> schema:author ?list. + ?list arq:member ?member. + ?member a ?type. + ?member m4i:orcidId ?id. + ?lr schema:author ?newlist. + ?newlist arq:member ?newmember. + ?newmember m4i:orcidId ?id. + FILTER(?lr != <{_LEARNING_RESOURCE_BASE_URI}{uuid}>) + FILTER(?type = schema:Person) + }} + LIMIT 10""" + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +def _get_same_discipline(uuid: UUID): + var_material = Variable("material") + var_discipline = Variable("discipline") + query = ( + QueryBuilder() + .SELECT(var_material) + .WHERE( + ( + URIRef(f"{_LEARNING_RESOURCE_BASE_URI}{uuid}"), + URIRef(URIRef(fabio.NS + "hasDiscipline")), + var_discipline, + ), + ( + var_material, + URIRef(URIRef(fabio.NS + "hasDiscipline")), + var_discipline, + ), + ) + .build() + ) + results = query_dalia_dataset(query) + return [result[0] for result in results] + + +RANKING = ( + _get_is_part_of, + _get_is_based_on, + _get_is_related_to, + _get_authors, + _get_shared_keywords, + _get_same_discipline, +) + + +def get_suggested_contents_id(uuid: UUID) -> Set: + number_of_materials = 5 + results = set() + for i in range(len(RANKING)): + for result in RANKING[i](uuid): + if ( + _LEARNING_RESOURCE_BASE_URI + == result[: len(_LEARNING_RESOURCE_BASE_URI)] + ): + results.add(result) + if len(results) == number_of_materials: + return results + + return results + + +def get_suggested_contents(uuid: UUID) -> SuggestedContents: + ids = get_suggested_contents_id(uuid) + return SuggestedContents(get_metadata_for_learning_resources(list(ids))) diff --git a/project/recommendation/models.py b/project/recommendation/models.py new file mode 100644 index 0000000000000000000000000000000000000000..71a836239075aa6e6e4ecb700e9c42c95c022d91 --- /dev/null +++ b/project/recommendation/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/project/recommendation/serializers.py b/project/recommendation/serializers.py new file mode 100644 index 0000000000000000000000000000000000000000..d663a7b79aa3fb1a9ec6f3a1795280e0bff83bb4 --- /dev/null +++ b/project/recommendation/serializers.py @@ -0,0 +1,8 @@ +from rest_framework_dataclasses.serializers import DataclassSerializer + +import project.recommendation.api_models.api_models as api_models + + +class SuggestedContentSerializer(DataclassSerializer): + class Meta: + dataclass = api_models.SuggestedContents diff --git a/project/recommendation/urls.py b/project/recommendation/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..2b339398ca96495fc21775092f733764da10f7df --- /dev/null +++ b/project/recommendation/urls.py @@ -0,0 +1,15 @@ +""" +URL configuration for app 'recommendation'. +""" + +from django.urls import path + +from project.recommendation import views + +urlpatterns = [ + path( + "v1/item/<uuid:material_id>/recommendations", + views.MaterialSuggestionsView.as_view(), + name="material_recommendations", + ), +] diff --git a/project/recommendation/views.py b/project/recommendation/views.py new file mode 100644 index 0000000000000000000000000000000000000000..52c245f32729455c2a03d4c48119f7d6ec6edf89 --- /dev/null +++ b/project/recommendation/views.py @@ -0,0 +1,21 @@ +from uuid import UUID + +from rest_framework import status +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework.views import APIView + +import project.recommendation.serializers as serializers +from project.recommendation.materials.suggested_content import get_suggested_contents + + +# endpoint /items/<uuid:material_id>/recommendations +class MaterialSuggestionsView(APIView): + def get(self, request: Request, material_id: UUID): + suggested_contents = get_suggested_contents(material_id) + if len(suggested_contents.results) == 0: + return Response( + {"messages": "No suggestions found"}, status=status.HTTP_404_NOT_FOUND + ) + result = serializers.SuggestedContentSerializer(suggested_contents) + return Response(result.data) diff --git a/project/urls.py b/project/urls.py index 52f284412a83a2212bd72aef47029223353c44a5..e1d1e4fb8df5e59cee8e3e126bf21d179dd394d5 100644 --- a/project/urls.py +++ b/project/urls.py @@ -17,9 +17,11 @@ Including another URLconf from django.urls import include, path from project.dalia import urls as dalia_urls +from project.recommendation import urls as recommendation_urls urlpatterns = [ path('api/dalia/', include(dalia_urls)), + path('api/dalia/recommendation/', include(recommendation_urls)), path("api/accounts/", include("allauth.urls")), path("api/_allauth/", include("allauth.headless.urls")), ] diff --git a/tests/project/recommendation/__init__.py b/tests/project/recommendation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/project/recommendation/test_query.py b/tests/project/recommendation/test_query.py new file mode 100644 index 0000000000000000000000000000000000000000..0d9f5b6a958da3196493ce2e3e07517f38ae086e --- /dev/null +++ b/tests/project/recommendation/test_query.py @@ -0,0 +1,85 @@ +from uuid import UUID + +from project.recommendation.materials.suggested_content import ( + _get_authors, + _get_is_based_on, + _get_is_part_of, + _get_is_related_to, + _get_same_discipline, + _get_shared_keywords, + get_suggested_contents, +) + +# def test_query_for_suggested_content(): +# uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + +# print() +# print("--- QUERY ---") +# print(_query_for_suggested_content(uuid)) +# print("--- QUERY ---") + + +# def test_get_suggested_content(): +# uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + +# results = get_suggested_contents(uuid) +# assert results == SuggestedContents( +# uris=["https://av.tib.eu/series/1527/datenmanagement+in+der+chemie+videos+zum+acf+praktikum+an+der+rwth+aachen+university"] +# ) + + +def test_get_is_part_of(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get is part of") + results = _get_is_part_of(uuid) + print("\n".join(results)) + + +def test_get_is_related_to(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get is based on") + results = _get_is_related_to(uuid) + print("\n".join(results)) + + +def test_get_is_based_on(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get is related to") + results = _get_is_based_on(uuid) + print("\n".join(results)) + + +def test_get_shared_keywords(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get shared keywords") + results = _get_shared_keywords(uuid) + print("\n".join(results)) + + +def test_get_authors(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get authors") + results = _get_authors(uuid) + print("\n".join(results)) + + +def test_get_same_discipline(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get same discipline") + results = _get_same_discipline(uuid) + print("\n".join(results)) + + +def test_get_suggested_contents(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get suggested contents") + results = get_suggested_contents(uuid) + for result in results: + print(result) diff --git a/tests/project/recommendation/test_serialiser.py b/tests/project/recommendation/test_serialiser.py new file mode 100644 index 0000000000000000000000000000000000000000..966bb87844446e1bedaa7bd7bb59470d0a1756c2 --- /dev/null +++ b/tests/project/recommendation/test_serialiser.py @@ -0,0 +1,13 @@ +from uuid import UUID + +from project.recommendation.materials.suggested_content import get_suggested_contents +from project.recommendation.serializers import SuggestedContentSerializer + + +def test_suggested_content_serializer(): + uuid = UUID("39ea23c6-a591-442f-afde-c3262e20f1e4") + + print("get suggested contents") + results = get_suggested_contents(uuid) + serializer = SuggestedContentSerializer(results) + print(serializer.data)