diff --git a/blackmesa/records/adjuncts.py b/blackmesa/records/adjuncts.py new file mode 100644 index 0000000..06e3ba3 --- /dev/null +++ b/blackmesa/records/adjuncts.py @@ -0,0 +1,63 @@ +class BaseAdjunct: + """ + Any Adjunct data which does not translate into SQL, but rather adds data programmatically. + + Basic smart class for records() call. + """ + skip = False # if skip is true, this adjunct will not be actually processed. + resolves_field = True # if resolves_field is true, this adjunct will be called for a single field with resolve() + post_processing = False # if post_processing is true, this adjunct will in the end be called with dbdata, and be able to manipulate the whole dictionary. + + def resolve(self, model, dbdata): + """ + resolve returns the field value for one entry. + """ + raise NotImplementedError + + def post_process(self, model, dbdata): + """ if you have post_processing on True, this needs to be implemented. + has to return either a new dictionary to use in initialization of an object, or None. + """ + raise NotImplementedError + + def values_field(self): + """ + return a field for the values operator. + + if you return a string, it will be added to values as args. + if you return a tuple, it will be added to values as kwargs. + """ + return None + +class Adjunct(BaseAdjunct): + """ value function that adds data, without SQL handling. """ + def __init__(self, value=None): + self.value = value + + def resolve(self, model, dbdata): + return self.value + +class Lambda(Adjunct): + """ adjunct value that returns a field value with a callback. """ + def __init__(self, callback): + self.callback = callback if callable(callback) else None + + def resolve(self, model, dbdata): + # at this point i could check if callback needs 0-2 arguments and decide the call. + if self.callback: + return self.callback(dbdata) + +class Skip(BaseAdjunct): + """ Skips this key from being retrieved from the database or used in the dataclass instantiation """ + skip = True + resolves_field = False + +class Callback(BaseAdjunct): + """ calls a callback which can modify the whole initialization dictionary. """ + resolves_field = False + post_processing = True + def __init__(self, callback): + self.callback = callback + def post_process(self, model, dbdata): + if self.callback: + return self.callback(dbdata) diff --git a/blackmesa/records/handlers.py b/blackmesa/records/handlers.py new file mode 100644 index 0000000..5a4fa45 --- /dev/null +++ b/blackmesa/records/handlers.py @@ -0,0 +1,56 @@ + +## Wrapper to handle some sort of record baseclass +class RecordHandler: + """ handler for a record type + + defines how a record can be created, and how to retrieve all field names, and the required ones. + """ + + @classmethod + def wrap(cls, klass): + return cls(klass) + + def __init__(self, klass): + self.klass = klass + + def create(self, **kwargs): + return self.klass(**kwargs) + + def get_field_names(self): + return self.klass.__dict__.keys() + + @property + def record(self): + return self.klass + + @property + def required_arguments(self): + return self.get_field_names() + + +class RecordDictHandler(RecordHandler): + """ RecordHandler that outputs a dictionary """ + + def __init__(self, klass=None): + # it is not required to define dict, but you could do OrderedDict e.g. + self.klass = klass or dict + + def get_field_names(self): + # dictionary has no required fields. + return [] + +class RecordDataclassHandler(RecordHandler): + """ handles dataclasses.dataclass derivatives """ + + def create(self, **kwargs): + # clean field names to be only valid if they are on the dataclass. + record_fields = self.get_field_names() + kwargs = {k: v for k, v in kwargs.items() if k in record_fields} + return self.klass(**kwargs) + + def get_field_names(self): + return list(self.klass.__dataclass_fields__.keys()) + +# @TODO: namedtuple - dynamic generation? +# @TODO: RecordPydanticHAndler +# @TODO: RecordAttrsHandler diff --git a/blackmesa/records/readme.md b/blackmesa/records/readme.md new file mode 100644 index 0000000..b491a1f --- /dev/null +++ b/blackmesa/records/readme.md @@ -0,0 +1,66 @@ +# Django Records + +Create arbitrary classes instead of django models for usage in layered architectures. + + +## Records fetching Data directly into Dataclasses + +##### The records() QuerySet and Manager command + +The idea behind records() is to instead of directly transforming a Model into a dataclass, to fetch data from the database with a values() like call. However instead dictionaries that values() creates, it produces arbitrary classes or dataclasses, so, "records". It completely skips instantiation of a django model instance, and comes with tools to make it easy to handle initialization data, so that you can automate even production of immutable or nested data structures. + +Out of the box, records assumes, you want to use `dataclasses.dataclass` + +Usage: + +```python + SomeModel.objects.filter(...).records(DataClass, 'field1', 'field2', annotation=F(...), adjunct=Adjunct(...)) +``` + +##### How Records works and what it returns + +Just like Values() returns an iterator, that will work with database calls in chunks, Records() returns an iterator, that will work exactly the same mechanically, but instead of producing dictionaries, it produces whatever you define as a "record". + +This means, that you can also use things like `.first()` chained (which will return a single record) + +records() works very similar to Django .values(): + + - args are fields that are selected. + - kwargs that are expressions are annotated into the key as field + +However it also differs: + + - The first (and only first) argument in args can be a target class to produce. See: `RecordHandler`. + - you should not define the fields, which are part of your target class for most handlers. They are added automatically to the values() call, by the handler. + - kwargs can contain a special kind of class, called an Adjunct, which is *not* included in the SQL, and allows "local data" to be inserted into the resulting class produced. See: `Adjunct`. + +##### RecordHandler + +The Dataclasses produced are handled by the `RecordHandler` class. This allows to adjust the creation of the dataclass, to whatever library you want to use. At the moment, an example for handling a dictionary, and to a dataclasses.dataclass. + +> Todo: Pydantic, attrs, namedtuple. + +By Default, it expects dataclasses.dataclass, and uses the `RecordDataclassHandler`. + +The whole class is kept simple, so that you can write your own RecordHandler if you work with a different kind of dataclass. + +Instead of providing a record class, you can also provide an instantiated record handler. For example, you could use RecordDictHandler(), and you would get dictionaries from records(). Which essentially would be the same output as values(). Except that you can use Adjuncts. + +##### Adjunct + +Just like Django Expressions can be used to annotate keys in the model that are retrieved, so can Adjuncts be used to circumvent this mechanic, and insert local data into your target class. You might want to use this, if e.g. the dataclass you create is *immutable*, or the dataclass you use has *required fields*, that need data when you create the class, but the data is not part of your database query. + +Various Adjunct mechanics are available: + + - `Adjunct` itself simply carries some data and inserts it into every model. e.g. `.records(data=Adjunct(1))` will set the field `data` always to 1. + - `Lambda` allows to use a callable as argument, which gets called when setting the field on the model. e.g. `.records(data=Lambda(lambda entry: 'x' in entry))` + - `Skip` allows you to skip a field. This is needed, as records() would include all fields on a dataclass, without knowing if it is optional. + - `Callback` allows you to call a function as a callback at creation - if the callback returns anything else than None, it is used as initializer for the production of the object. + +Just as with expressions, you can of course also write your own Adjunct classes, by subclassing `BaseAdjunct`. + +##### default_record defininition + +If records() is not provided a class or handler, you can define a default either on the Manager or QuerySet, by using `_default_record`. You can also put this on the model class. If a record class is not found, an Exception is raised. + + diff --git a/blackmesa/records/records.py b/blackmesa/records/records.py index 79dfc35..9b6944f 100644 --- a/blackmesa/records/records.py +++ b/blackmesa/records/records.py @@ -2,146 +2,13 @@ from django.db.models import QuerySet from django.db.models.expressions import BaseExpression, Combinable from django.db.models.query import ValuesIterable from django.db.models.manager import Manager +from .adjuncts import BaseAdjunct +from .handlers import RecordDataclassHandler, RecordHandler -""" - - Base Idea: - - to have a queryset function that easily allows you to build some sort of "record type class" in a queryset. - - make it able to seamlessly use annotation functions - - allow modification of initial values with callbacks, which is needed if your dataclass is frozen. - - By default the RecordManager and RecordQuerySet will use RecordDataclass as handler, expecting your record to be a dataclasses.dataclass type. - - An Example: - @dataclass - class MyDataClass - id: int - some_relation: str - next_id: int - - SomeModel.objects.filter(...).records(MyDataClass, some_relation=F('model__relation'), next_id=Lambda(lambda x: x.get('id')+1)) - - This allows you to move an iterator into another layer, where it either can be consumed, or used as an input for another queryset call, - but still guaranteeing, that regular usage of the iterator will not yield any smart object. - It is good if you want to build a best-of-both-worlds approach for subquery-capable repository pattern, which is one of the biggest issues if you want - to keep your business logic out of the repo layer, but still want to utilize djangos queryset mechanics properly. - - records() will take anything values() would take, but additionally it allows: - - to pass the record type as first argument - - to pass Adjunct classes as keyword argument value - -""" - -## Useful for queryset function records() -class BaseAdjunct: - """ - Any Adjunct data which does not translate into SQL, but rather adds data programmatically. - """ - skip = False # if skip is true, this adjunct will not be actually processed. - resolves_field = True # if resolves_field is true, this adjunct will be called for a single field with resolve() - post_processing = False # if post_processing is true, this adjunct will in the end be called with dbdata, and be able to manipulate the whole dictionary. - - def resolve(self, model, dbdata): - raise NotImplementedError - - def post_process(self, model, dbdata): - raise NotImplementedError - -class Adjunct(BaseAdjunct): - """ value function that adds data, without SQL handling. """ - def __init__(self, value=None): - self.value = value - - def resolve(self, model, dbdata): - return self.value - -class Lambda(Adjunct): - """ adjunct value that returns a field value with a callback. """ - def __init__(self, callback): - self.callback = callback if callable(callback) else None - - def resolve(self, model, dbdata): - # at this point i could check if callback needs 0-2 arguments and decide the call. - if self.callback: - return self.callback(dbdata) - -class Skip(BaseAdjunct): - """ Skips this key from being retrieved from the database or used in the dataclass instantiation """ - skip = True - resolves_field = False - -class Callback(BaseAdjunct): - """ calls a callback which can modify the whole initialization dictionary. """ - resolves_field = False - post_processing = True - def __init__(self, callback): - self.callback = callback - def post_process(self, model, dbdata): - if self.callback: - return self.callback(dbdata) - -## Wrapper to handle some sort of record baseclass -class RecordHandler: - """ handler for a record type - - defines how a record can be created, and how to retrieve all field names, and the required ones. - """ - - @classmethod - def wrap(cls, klass): - return cls(klass) - - def __init__(self, klass): - self.klass = klass - - def create(self, **kwargs): - return self.klass(**kwargs) - - def get_field_names(self): - return self.klass.__dict__.keys() - - @property - def record(self): - return self.klass - - @property - def required_keys(self): - return self.get_field_names() - - -class RecordDict(RecordHandler): - """ RecordHandler that outputs a dictionary """ - - def __init__(self, klass=None): - # it is not required to define dict, but you could do OrderedDict e.g. - self.klass = klass or dict - - def get_field_names(self): - # dictionary has no required fields. - return [] - - -class RecordDataclass(RecordHandler): - """ handles dataclasses.dataclass derivatives """ - - def create(self, **kwargs): - # clean field names to be only valid if they are on the dataclass. - record_fields = self.get_field_names() - kwargs = {k: v for k, v in kwargs.items() if k in record_fields} - return self.klass(**kwargs) - - def get_field_names(self): - return list(self.klass.__dataclass_fields__.keys()) - -# @TODO: RecordPydantic -# @TODO: RecordAttrs - -###### QuerySet Plugin. class RecordIterable(ValuesIterable): """ - Iterable returned by records() that yields a record class for each row. - Replaces the standard iterable of the queryset. + Iterable returned by records() and attached to it's queryset, that yields a record class for each row. """ def __iter__(self): @@ -149,8 +16,11 @@ class RecordIterable(ValuesIterable): model = self.queryset.model query = queryset.query compiler = query.get_compiler(queryset.db) - record_data = getattr(queryset, '_record_extra', {}) - record_handler = queryset._record + record_data = getattr(queryset, '_record_kwargs', {}) + try: + record_handler = queryset._record + except AttributeError: + raise AttributeError("The queryset lacks a _record entry, Is _clone copying that field on the QuerySet class?") # extra(select=...) cols are always at the start of the row. names = [ @@ -179,7 +49,15 @@ class RecordIterable(ValuesIterable): class RecordQuerySetMixin: - _record_handler = RecordDataclass + """ + Actual records() implementation. + As records() calls values(), the queryset is chain-cloned there. + + You can mix this in into your QuerySet and Manager classes. + However make sure in QuerySet classes to implement _clone properly (or use RecordQuerySet instead) + """ + + _record_handler = RecordDataclassHandler def records(self, *args, **kwargs): """ @@ -187,10 +65,10 @@ class RecordQuerySetMixin: Acts like values(), however: - you can pass a record type or RecordHandler as first argument. - - if record type is not defined in records(), you have to define it on the queryset, or the model, with _record, + - if record type is not defined in records(), you have to define it on the queryset, or the model, with _default_record, otherwise it will raise a RuntimeError. - keyword arguments of type "Adjunct" are used as deferred values, and resolved independently. - - values() is called with every required_key on the dataclass not handled by an Adjunct + - values() is called with every required_argument on the dataclass not handled by an Adjunct """ if len(args) and not isinstance(args[0], str): # we assume this is our dataclass @@ -199,7 +77,7 @@ class RecordQuerySetMixin: args = args[1:] else: # determine dataclass. - handler = getattr(self, '_record', getattr(self.model, '_record', None)) + handler = getattr(self, '_default_record', getattr(self.model, '_default_record', getattr(self, '_record', None))) if not handler: raise RuntimeError("Trying to records a class without destination class.") @@ -207,38 +85,47 @@ class RecordQuerySetMixin: handler = self._record_handler.wrap(handler) all_keys = [*args, *kwargs.keys()] - unhandled_keys = list(set(handler.required_keys) - set(all_keys)) + unhandled_keys = list(set(handler.required_arguments) - set(all_keys)) args = [*args, *unhandled_keys] + # rebuild keyword arguments for values, by filtering out our adjuncts new_kw = {} - extra = {} + adjuncts = {} for k, v in kwargs.items(): if isinstance(v, BaseAdjunct): + # skip allows an adjunct to completely ignore a key. if not v.skip: - extra[k] = v - elif isinstance(v, BaseExpression) or isinstance(v, Combinable): + adjuncts[k] = v + # check if we have to add to values. adjuncts can define a field to add here. + add_to_values = v.values_field() + if isinstance(add_to_values, str) and add_to_values not in args: + args.append(add_to_values) + elif isinstance(add_to_values, tuple): + new_kw[add_to_values[0]] = add_to_values[1] + elif isinstance(v, BaseExpression) or isinstance(v, Combinable) or hasattr(v, 'resolve_expression'): new_kw[k] = v else: + # this will fail in values() for now, but i do not want to hijack future django functionality here. new_kw[k] = v - # copy ourself with values() and save the results on the cloned queryset. + # copy ourself with values() and save the results on the cloned queryset values produces. values = self.values(*args, **new_kw) values._iterable_class = RecordIterable - values._record_extra = extra + values._record_kwargs = adjuncts values._record = handler return values class RecordQuerySet(RecordQuerySetMixin, QuerySet): - # overwrite cloning. + # overwrite cloning. important. def _clone(self): c = super()._clone() - for key in ['_record', '_record_extra', '_record_handler']: + for key in ['_record', '_record_kwargs', '_record_handler', '_default_record']: if hasattr(self, key): setattr(c, key, getattr(self, key)) return c -# i use a mixin instead for better clarity. records is completely safe, as it does not call _chain. +# i use a mixin instead for better clarity with intellisense systems. records is completely safe, as it does not call _chain. # however you can also simply do: #class RecordManager(BaseManager.from_queryset(RecordQuerySet)): # pass diff --git a/blackmesa/records/tests/test_record_queries.py b/blackmesa/records/tests/test_record_queries.py index 970bb74..a5fd94a 100644 --- a/blackmesa/records/tests/test_record_queries.py +++ b/blackmesa/records/tests/test_record_queries.py @@ -3,11 +3,11 @@ from unittest.case import skipIf from django.db.models import F from django.test.testcases import TestCase +from django.test.utils import tag import mock -from ..records import Lambda, Adjunct, Callback -from django.test.utils import tag -from click.types import INT +from ..adjuncts import Lambda, Adjunct, Callback +from ..handlers import RecordDictHandler try: @@ -22,6 +22,7 @@ except RuntimeError: class Entity: id: int + @dataclass class SpaceRock: id: int @@ -29,6 +30,7 @@ class SpaceRock: orbits_name: str is_moon: bool + @tag('library') @skipIf(not celestials_installed, "Celestials Testpackage not installed into INSTALLED_APPS.") class TestQueryBuilder(TestCase): @@ -46,11 +48,16 @@ class TestQueryBuilder(TestCase): # find moons. test whether i can use entities to do an SQL query. works because i have only one key. self.assertEqual(len(self.moons), Celestial.objects.filter(orbits__in=entities).count()) - + # this is pretty much the same as self.assertEqual(len(self.moons), len(Celestial.objects.filter( orbits__in=Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4)).values_list('id', flat=True))) + def test_handler_dict(self): + entities = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4).records(RecordDictHandler()) + self.assertEqual(len(entities), len(self.planets)) + self.assertIsInstance(entities.first(), dict) + def test_lambda(self): # this tests whether our own celestial type or the celestial type of what we orbit is correct for being a moon. parameter is a dictionary. @@ -82,7 +89,7 @@ class TestQueryBuilder(TestCase): self.assertEqual(len(entities), len(self.celestials)) self.assertEqual(callback_one.call_count, len(self.celestials)) self.assertEqual(callback_two.call_count, len(self.celestials)) - + def test_double_value_technique(self): """ Records open a new sort of technique for late calling details from dataclasses. @@ -92,28 +99,30 @@ class TestQueryBuilder(TestCase): you could e.g. use it in a subquery, while still access the data. however this will hit the database twice if you evaluate the iterator yourself, as the lambda is not lazy. + + note this is mainly a thought experiment. """ - + planet_queryset = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4) - + @dataclass class DetailedEntity: id: int name: str - + @dataclass class IndexEntity: id: int detail: DetailedEntity - + def get_details_exec(data): return Celestial.objects.filter(pk=data.get('id')).records(DetailedEntity).first() - + get_details = mock.Mock(side_effect=get_details_exec) - + # retrieves data per key only. my_planets = planet_queryset.records(IndexEntity, detail=Lambda(get_details)) - my_moons = Celestial.objects.filter(orbits__in=my_planets).records(IndexEntity, detail=Lambda(get_details)) # legal + my_moons = Celestial.objects.filter(orbits__in=my_planets).records(IndexEntity, detail=Lambda(get_details)) # legal # django does not consume the iterator internally for subqueries: self.assertEqual(get_details.call_count, 0) # consume it ourselves... @@ -123,4 +132,3 @@ class TestQueryBuilder(TestCase): # but... self.assertEqual(len(my_moons), len(self.moons)) self.assertEqual(get_details.call_count, len(self.planets) + len(self.moons)) - \ No newline at end of file