Skip to content
Snippets Groups Projects
Commit 38dfe3ca authored by Benjamin Ledel's avatar Benjamin Ledel
Browse files

Update file views.py

parent 03fe90c1
Branches
Tags
No related merge requests found
Pipeline #1610769 passed
Pipeline: FIRST_START

#1610792

    ......@@ -386,22 +386,29 @@ def replace_provider_id(statement, providers = []):
    class GetProviderData(APIView):
    """
    Endpoint that allows an analytics engine to obtain provider statements from the lrs.
    Endpoint that allows an analytics engine to obtain provider statements from the LRS.
    """
    def post(self, request):
    start_time = time.time()
    print("Starting GetProviderData.post")
    token = request.headers.get("Authorization").split("Basic ")[1]
    print(f"Token extracted: {token}")
    try:
    req_serializer = GetProviderDataRequestSerializer(data=request.data)
    req_serializer.is_valid(raise_exception=True)
    print("Request serializer validated")
    analytics_token = AnalyticsToken.objects.get(key=token)
    print("Analytics token retrieved")
    if (
    analytics_token.expires is not None
    and analytics_token.expires <= timezone.now()
    ):
    print("Token has expired")
    return Response(
    {"message": "Token has expired."},
    status=status.HTTP_401_UNAUTHORIZED,
    ......@@ -412,25 +419,23 @@ class GetProviderData(APIView):
    collection = lrs_db["statements"]
    active_verbs = list(map(lambda x : x["verb"], list(AnalyticsTokenVerb.objects.filter(analytics_token_id = analytics_token).values())))
    if settings.SHOW_XAPI_STATEMENTS:
    print(active_verbs)
    db_fetch_start = time.time()
    active_verbs = list(map(lambda x: x["verb"], list(AnalyticsTokenVerb.objects.filter(analytics_token_id=analytics_token).values())))
    print(f"Active verbs retrieved in {time.time() - db_fetch_start:.6f} seconds: {active_verbs}")
    providers = []
    # check for anonymized collection
    for analytics_token_verb in AnalyticsTokenVerb.objects.filter(analytics_token_id = analytics_token):
    for analytics_token_verb in AnalyticsTokenVerb.objects.filter(analytics_token_id=analytics_token):
    if analytics_token_verb.provider not in providers:
    providers.append(analytics_token_verb.provider)
    anon_verbs = []
    for provider in providers:
    # get verbs that can be collected anonymously and their minimum count
    # and get those, whose minimum count is reached
    try:
    latest_schema = ProviderSchema.objects.get(
    provider=provider, superseded_by__isnull=True
    )
    except ObjectDoesNotExist:
    print(f"No consent provider schema found for provider: {provider.name}")
    return JsonResponse(
    {
    "message": "No consent provider schema found.",
    ......@@ -439,50 +444,43 @@ class GetProviderData(APIView):
    safe=False,
    status=status.HTTP_500_INTERNAL_SERVER_ERROR,
    )
    for verb in [verb for verblist in [group["verbs"] for group in latest_schema.groups] for verb in verblist]:
    if verb["id"] in active_verbs and verb.get("allowAnonymizedCollection", False):
    min_count = verb.get("allowAnonymizedCollectionMinCount", settings.ANONYMIZATION_DEFAULT_MINIMUM_COUNT)
    query_start = time.time()
    current_count = collection.distinct("actor.mbox", {
    "$and": [
    {"verb.id": {"$eq": verb["id"]}},
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}}
    ]
    }).length
    }).__len__()
    print(f"Query for verb {verb['id']} executed in {time.time() - query_start:.6f} seconds")
    if current_count >= min_count:
    anon_verbs.append(verb)
    print(f"Anonymous verbs determined: {anon_verbs}")
    # selects for anonymized statements of which there are enough different actors
    # or explicitly non-anonymized statements / system statements
    anon_query = {"$or": [
    # current statement is anonymized and
    # enough anonymized statements for this verb exist
    query_start_time = time.time()
    query = (
    {
    "$and": [
    {"actor.tan": {"$exists": False}},
    {"_id": {"$gt": ObjectId(last_object_id)}},
    {"verb.id": {"$in": active_verbs}},
    {"$or": [
    {"$and": [
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}},
    {"verb.id": {"$in": anon_verbs}},
    ]},
    # current statement is not anonymized
    {"$or": [
    {"actor.mbox": {"$exists": False}},
    {"$and": [
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^mailto"}},
    ]},
    # also query for system statements added by relevant providers
    get_system_statement_query(providers)
    ]}
    {"actor.mbox": {"$exists": False}},
    {"$and": [
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^mailto"}},
    ]},
    get_system_statement_query(providers)
    ]}
    query = (
    {
    "$and": [
    {"actor.tan": {"$exists": False}}, # important! flagged statements filtered out here
    {"_id": {"$gt": ObjectId(last_object_id)}},
    {"verb.id": {"$in": active_verbs}},
    anon_query # for anonymized statements
    ]
    }
    if last_object_id
    ......@@ -490,13 +488,27 @@ class GetProviderData(APIView):
    "$and": [
    {"actor.tan": {"$exists": False}},
    {"verb.id": {"$in": active_verbs}},
    anon_query # for anonymized statements
    {"$or": [
    {"$and": [
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^" + settings.ANONYMIZATION_HASH_PREFIX}},
    {"verb.id": {"$in": anon_verbs}},
    ]},
    {"actor.mbox": {"$exists": False}},
    {"$and": [
    {"actor.mbox": {"$exists": True}},
    {"actor.mbox": {"$regex": "^mailto"}},
    ]},
    get_system_statement_query(providers)
    ]}
    ]
    }
    )
    if settings.SHOW_XAPI_STATEMENTS:
    print(query)
    print(f"Query constructed in {time.time() - query_start_time:.6f} seconds")
    execution_start = time.time()
    cursor = collection.find(query).limit(page_size)
    print(f"Query executed in {time.time() - execution_start:.6f} seconds")
    data = {
    "verbs": list(set(active_verbs)),
    "statements": list(map(replace_provider_id, list(cursor))),
    ......@@ -506,8 +518,10 @@ class GetProviderData(APIView):
    serializer = ProviderDataSerializer(data=data)
    if serializer.is_valid():
    print(f"Total execution time: {time.time() - start_time:.6f} seconds")
    return Response(serializer.data, status=status.HTTP_200_OK)
    print("Failed to serialize response data")
    return Response(
    {
    "message": "Failed to serialize response data.",
    ......@@ -517,7 +531,7 @@ class GetProviderData(APIView):
    )
    except ObjectDoesNotExist as c:
    print(c)
    print(f"Exception occurred: {c}")
    return Response(
    {"message": "Invalid analytics token " + token},
    status=status.HTTP_401_UNAUTHORIZED,
    ......
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment