Skip to content
Snippets Groups Projects
Commit ae18c242 authored by Michael Thies's avatar Michael Thies
Browse files

backends.couchdb: Use WeakValueDictionary to ensure retrieved object identity

parent 4ea6143b
Branches
Tags
1 merge request!50Feature/backend couchdb
Pipeline #354302 failed
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
Todo: Add module docstring Todo: Add module docstring
""" """
import threading import threading
import weakref
from typing import List, Dict, Any, Optional, Iterator, Iterable, Union from typing import List, Dict, Any, Optional, Iterator, Iterable, Union
import re import re
import urllib.parse import urllib.parse
...@@ -232,6 +233,14 @@ class CouchDBObjectStore(model.AbstractObjectStore): ...@@ -232,6 +233,14 @@ class CouchDBObjectStore(model.AbstractObjectStore):
self.url: str = url self.url: str = url
self.database_name: str = database self.database_name: str = database
# A dictionary of weak references to local replications of stored objects. Objects are kept in this cache as
# long as there is any other reference in the Python application to them. We use this to make sure that only one
# local replication of each object is kept in the application and retrieving an object from the store always
# returns the **same** (not only equal) object. Still, objects are forgotten, when they are not referenced
# anywhere else to save memory.
self._object_cache = weakref.WeakValueDictionary()
self._object_cache_lock = threading.Lock()
def check_database(self, create=False): def check_database(self, create=False):
""" """
Check if the database exists and created it if not (and requested to do so) Check if the database exists and created it if not (and requested to do so)
...@@ -294,6 +303,19 @@ class CouchDBObjectStore(model.AbstractObjectStore): ...@@ -294,6 +303,19 @@ class CouchDBObjectStore(model.AbstractObjectStore):
self.generate_source(obj) # Generate the source parameter of this object self.generate_source(obj) # Generate the source parameter of this object
set_couchdb_revision("{}/{}/{}".format(self.url, self.database_name, urllib.parse.quote(identifier, safe='')), set_couchdb_revision("{}/{}/{}".format(self.url, self.database_name, urllib.parse.quote(identifier, safe='')),
data["_rev"]) data["_rev"])
# If we still have a local replication of that object (since it is referenced from anywhere else), update that
# replication and return it.
with self._object_cache_lock:
if obj.identification in self._object_cache:
old_obj = self._object_cache[obj.identification]
# If the source does not match the correct source for this CouchDB backend, the object seems to belong
# to another backend now, so we return a fresh copy
if old_obj.source == obj.source:
old_obj.update_from(obj)
return old_obj
self._object_cache[obj.identification] = obj
return obj return obj
def add(self, x: model.Identifiable) -> None: def add(self, x: model.Identifiable) -> None:
...@@ -322,6 +344,8 @@ class CouchDBObjectStore(model.AbstractObjectStore): ...@@ -322,6 +344,8 @@ class CouchDBObjectStore(model.AbstractObjectStore):
raise KeyError("Identifiable with id {} already exists in CouchDB database".format(x.identification))\ raise KeyError("Identifiable with id {} already exists in CouchDB database".format(x.identification))\
from e from e
raise raise
with self._object_cache_lock:
self._object_cache[x.identification] = x
self.generate_source(x) # Set the source of the object self.generate_source(x) # Set the source of the object
def discard(self, x: model.Identifiable, safe_delete=False) -> None: def discard(self, x: model.Identifiable, safe_delete=False) -> None:
...@@ -380,6 +404,8 @@ class CouchDBObjectStore(model.AbstractObjectStore): ...@@ -380,6 +404,8 @@ class CouchDBObjectStore(model.AbstractObjectStore):
delete_couchdb_revision("{}/{}/{}".format(self.url, delete_couchdb_revision("{}/{}/{}".format(self.url,
self.database_name, self.database_name,
self._transform_id(x.identification))) self._transform_id(x.identification)))
with self._object_cache_lock:
del self._object_cache[x.identification]
x.source = "" x.source = ""
def __contains__(self, x: object) -> bool: def __contains__(self, x: object) -> bool:
......
...@@ -109,6 +109,12 @@ class CouchDBBackendTest(unittest.TestCase): ...@@ -109,6 +109,12 @@ class CouchDBBackendTest(unittest.TestCase):
model.Identifier(id_='https://acplt.org/Test_Submodel', id_type=model.IdentifierType.IRI)) model.Identifier(id_='https://acplt.org/Test_Submodel', id_type=model.IdentifierType.IRI))
self.assertIs(test_object_retrieved, test_object_retrieved_again) self.assertIs(test_object_retrieved, test_object_retrieved_again)
# However, a changed source should invalidate the cached object, so we should get a new copy
test_object_retrieved.source = "couchdb://example.com/example/IRI-https%3A%2F%2Facplt.org%2FTest_Submodel"
test_object_retrieved_third = self.object_store.get_identifiable(
model.Identifier(id_='https://acplt.org/Test_Submodel', id_type=model.IdentifierType.IRI))
self.assertIsNot(test_object_retrieved, test_object_retrieved_third)
def test_example_submodel_storing(self) -> None: def test_example_submodel_storing(self) -> None:
example_submodel = create_example_submodel() example_submodel = create_example_submodel()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment