diff --git a/src/providers/views.py b/src/providers/views.py index f9199b7fef3bd92553b805ebc11dc674f6ce958e..de6a933b1afc766595d25d447556a3ea575a92ce 100644 --- a/src/providers/views.py +++ b/src/providers/views.py @@ -386,22 +386,29 @@ def replace_provider_id(statement, providers = []): class GetProviderData(APIView): """ - Endpoint that allows an analytics engine to obtain provider statements from the lrs. + Endpoint that allows an analytics engine to obtain provider statements from the LRS. """ def post(self, request): + start_time = time.time() + print("Starting GetProviderData.post") + token = request.headers.get("Authorization").split("Basic ")[1] + print(f"Token extracted: {token}") try: req_serializer = GetProviderDataRequestSerializer(data=request.data) req_serializer.is_valid(raise_exception=True) + print("Request serializer validated") analytics_token = AnalyticsToken.objects.get(key=token) + print("Analytics token retrieved") if ( analytics_token.expires is not None and analytics_token.expires <= timezone.now() ): + print("Token has expired") return Response( {"message": "Token has expired."}, status=status.HTTP_401_UNAUTHORIZED, @@ -412,25 +419,23 @@ class GetProviderData(APIView): collection = lrs_db["statements"] - active_verbs = list(map(lambda x : x["verb"], list(AnalyticsTokenVerb.objects.filter(analytics_token_id = analytics_token).values()))) - if settings.SHOW_XAPI_STATEMENTS: - print(active_verbs) - + db_fetch_start = time.time() + active_verbs = list(map(lambda x: x["verb"], list(AnalyticsTokenVerb.objects.filter(analytics_token_id=analytics_token).values()))) + print(f"Active verbs retrieved in {time.time() - db_fetch_start:.6f} seconds: {active_verbs}") + providers = [] - # check for anonymized collection - for analytics_token_verb in AnalyticsTokenVerb.objects.filter(analytics_token_id = analytics_token): + for analytics_token_verb in AnalyticsTokenVerb.objects.filter(analytics_token_id=analytics_token): if analytics_token_verb.provider not in providers: providers.append(analytics_token_verb.provider) anon_verbs = [] for provider in providers: - # get verbs that can be collected anonymously and their minimum count - # and get those, whose minimum count is reached try: latest_schema = ProviderSchema.objects.get( provider=provider, superseded_by__isnull=True ) except ObjectDoesNotExist: + print(f"No consent provider schema found for provider: {provider.name}") return JsonResponse( { "message": "No consent provider schema found.", @@ -439,50 +444,43 @@ class GetProviderData(APIView): safe=False, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) - - + for verb in [verb for verblist in [group["verbs"] for group in latest_schema.groups] for verb in verblist]: if verb["id"] in active_verbs and verb.get("allowAnonymizedCollection", False): min_count = verb.get("allowAnonymizedCollectionMinCount", settings.ANONYMIZATION_DEFAULT_MINIMUM_COUNT) + query_start = time.time() current_count = collection.distinct("actor.mbox", { "$and": [ {"verb.id": {"$eq": verb["id"]}}, {"actor.mbox": {"$exists": True}}, {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}} ] - }).length + }).__len__() + print(f"Query for verb {verb['id']} executed in {time.time() - query_start:.6f} seconds") if current_count >= min_count: anon_verbs.append(verb) + print(f"Anonymous verbs determined: {anon_verbs}") - # selects for anonymized statements of which there are enough different actors - # or explicitly non-anonymized statements / system statements - anon_query = {"$or": [ - # current statement is anonymized and - # enough anonymized statements for this verb exist + query_start_time = time.time() + query = ( + { + "$and": [ + {"actor.tan": {"$exists": False}}, + {"_id": {"$gt": ObjectId(last_object_id)}}, + {"verb.id": {"$in": active_verbs}}, + {"$or": [ {"$and": [ {"actor.mbox": {"$exists": True}}, {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}}, {"verb.id": {"$in": anon_verbs}}, ]}, - # current statement is not anonymized - {"$or": [ - {"actor.mbox": {"$exists": False}}, - {"$and": [ - {"actor.mbox": {"$exists": True}}, - {"actor.mbox": {"$regex": "^mailto"}}, - ]}, - # also query for system statements added by relevant providers - get_system_statement_query(providers) - ]} + {"actor.mbox": {"$exists": False}}, + {"$and": [ + {"actor.mbox": {"$exists": True}}, + {"actor.mbox": {"$regex": "^mailto"}}, + ]}, + get_system_statement_query(providers) ]} - - query = ( - { - "$and": [ - {"actor.tan": {"$exists": False}}, # important! flagged statements filtered out here - {"_id": {"$gt": ObjectId(last_object_id)}}, - {"verb.id": {"$in": active_verbs}}, - anon_query # for anonymized statements ] } if last_object_id @@ -490,13 +488,27 @@ class GetProviderData(APIView): "$and": [ {"actor.tan": {"$exists": False}}, {"verb.id": {"$in": active_verbs}}, - anon_query # for anonymized statements + {"$or": [ + {"$and": [ + {"actor.mbox": {"$exists": True}}, + {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}}, + {"verb.id": {"$in": anon_verbs}}, + ]}, + {"actor.mbox": {"$exists": False}}, + {"$and": [ + {"actor.mbox": {"$exists": True}}, + {"actor.mbox": {"$regex": "^mailto"}}, + ]}, + get_system_statement_query(providers) + ]} ] } ) - if settings.SHOW_XAPI_STATEMENTS: - print(query) + print(f"Query constructed in {time.time() - query_start_time:.6f} seconds") + + execution_start = time.time() cursor = collection.find(query).limit(page_size) + print(f"Query executed in {time.time() - execution_start:.6f} seconds") data = { "verbs": list(set(active_verbs)), "statements": list(map(replace_provider_id, list(cursor))), @@ -506,8 +518,10 @@ class GetProviderData(APIView): serializer = ProviderDataSerializer(data=data) if serializer.is_valid(): + print(f"Total execution time: {time.time() - start_time:.6f} seconds") return Response(serializer.data, status=status.HTTP_200_OK) + print("Failed to serialize response data") return Response( { "message": "Failed to serialize response data.", @@ -517,7 +531,7 @@ class GetProviderData(APIView): ) except ObjectDoesNotExist as c: - print(c) + print(f"Exception occurred: {c}") return Response( {"message": "Invalid analytics token " + token}, status=status.HTTP_401_UNAUTHORIZED,