Skip to content

Commit

Permalink
Merge pull request #10 from HelmholtzAI-Consultants-Munich/feats/mana…
Browse files Browse the repository at this point in the history
…ge_disk_files

Feats/manage disk files
  • Loading branch information
IsraMekki0 authored Apr 29, 2024
2 parents 2530999 + 0c66c25 commit dca5556
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 7 deletions.
31 changes: 31 additions & 0 deletions effidict/_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import time
from abc import abstractmethod
from collections import OrderedDict

Expand All @@ -15,6 +17,9 @@ class EffiDictBase:
def __init__(self, max_in_memory=100, storage_path="cache"):
self.max_in_memory = max_in_memory
self.storage_path = storage_path
# add a unique identifier to the storage path to avoid conflicts
self.storage_path = self.storage_path + f"{int(time.time())}_{id(self)}"

self.memory = OrderedDict()

def __iter__(self):
Expand Down Expand Up @@ -93,3 +98,29 @@ def _deserialize(self, key):
@abstractmethod
def load_from_dict(self, dictionary):
pass

@abstractmethod
def destroy(self):
pass

def __del__(self):
self.destroy()

def pop(self, key, default=None):
"""
Remove an item from the cache and return its value.
This method attempts to use the __getitem__ and __delitem__ methods to
access and remove the item, respectively. If the key is not found, it
returns a default value if provided, or raises a KeyError.
:param key: The key of the item to remove.
:param default: The default value to return if the key is not found.
:return: The value of the removed item if the key is found or the default value if not.
"""
try:
value = self[key]
del self[key]
return value
except KeyError:
return default
32 changes: 28 additions & 4 deletions effidict/lru_dict.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import inspect
import json
import os
import pickle
import shutil
import sqlite3
import warnings

from ._base import EffiDictBase

Expand Down Expand Up @@ -34,8 +37,7 @@ def __init__(self, max_in_memory=100, storage_path="cache"):
"""
super().__init__(max_in_memory, storage_path)
if not os.path.exists(storage_path):
os.makedirs(storage_path)
os.makedirs(self.storage_path)

def _serialize(self, key, value):
"""
Expand Down Expand Up @@ -141,6 +143,20 @@ def load_from_dict(self, dictionary):
for key, value in dictionary.items():
self[key] = value

def destroy(self):
"""
Destroy the cache and remove all serialized files on disk.
"""
# Problem with joblib: don't delete the storage_path if called by joblib
called_by_joblib = any(
record.function == "_process_worker" for record in inspect.stack()
)

if not called_by_joblib:
shutil.rmtree(self.storage_path)

del self.memory


class LRUDBDict(EffiDictBase):
"""
Expand All @@ -156,9 +172,9 @@ class LRUDBDict(EffiDictBase):
:type storage_path: str
"""

def __init__(self, max_in_memory=100, storage_path="cache.db"):
def __init__(self, max_in_memory=100, storage_path="cache"):
super().__init__(max_in_memory, storage_path)
self.conn = sqlite3.connect(storage_path)
self.conn = sqlite3.connect(self.storage_path + ".db")
self.cursor = self.conn.cursor()
self.cursor.execute(
"CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT)"
Expand Down Expand Up @@ -265,3 +281,11 @@ def load_from_dict(self, dictionary):
"REPLACE INTO data (key, value) VALUES (?, ?)",
items_to_insert,
)

def destroy(self):
"""
Destroy the cache and remove the SQLite database file.
"""
del self.memory
self.conn.close()
os.remove(self.storage_path + ".db")
3 changes: 0 additions & 3 deletions tests/test_lru_dict.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import shutil

import pytest

Expand All @@ -11,7 +10,6 @@ def lru_dict(tmp_path):
"""Fixture to create an LRUDict instance and clean up after tests."""
cache = LRUDict(max_in_memory=2, storage_path=str(tmp_path / "lrudict_cache"))
yield cache
shutil.rmtree(str(tmp_path / "lrudict_cache"), ignore_errors=True)


def test_lrudict_set_and_get_item(lru_dict):
Expand Down Expand Up @@ -78,7 +76,6 @@ def lrudb_dict(tmp_path):
cache = LRUDBDict(max_in_memory=2, storage_path=db_path)
yield cache
cache.conn.close()
os.remove(db_path)


def test_lrudbdict_initialization(lrudb_dict):
Expand Down

0 comments on commit dca5556

Please sign in to comment.