making django-records more release worthy

This commit is contained in:
Gabor Körber 2021-06-02 10:24:33 +02:00
parent e19c5fc38e
commit bed4ae6bbd
5 changed files with 244 additions and 164 deletions

View File

@ -0,0 +1,63 @@
class BaseAdjunct:
"""
Any Adjunct data which does not translate into SQL, but rather adds data programmatically.
Basic smart class for records() call.
"""
skip = False # if skip is true, this adjunct will not be actually processed.
resolves_field = True # if resolves_field is true, this adjunct will be called for a single field with resolve()
post_processing = False # if post_processing is true, this adjunct will in the end be called with dbdata, and be able to manipulate the whole dictionary.
def resolve(self, model, dbdata):
"""
resolve returns the field value for one entry.
"""
raise NotImplementedError
def post_process(self, model, dbdata):
""" if you have post_processing on True, this needs to be implemented.
has to return either a new dictionary to use in initialization of an object, or None.
"""
raise NotImplementedError
def values_field(self):
"""
return a field for the values operator.
if you return a string, it will be added to values as args.
if you return a tuple, it will be added to values as kwargs.
"""
return None
class Adjunct(BaseAdjunct):
""" value function that adds data, without SQL handling. """
def __init__(self, value=None):
self.value = value
def resolve(self, model, dbdata):
return self.value
class Lambda(Adjunct):
""" adjunct value that returns a field value with a callback. """
def __init__(self, callback):
self.callback = callback if callable(callback) else None
def resolve(self, model, dbdata):
# at this point i could check if callback needs 0-2 arguments and decide the call.
if self.callback:
return self.callback(dbdata)
class Skip(BaseAdjunct):
""" Skips this key from being retrieved from the database or used in the dataclass instantiation """
skip = True
resolves_field = False
class Callback(BaseAdjunct):
""" calls a callback which can modify the whole initialization dictionary. """
resolves_field = False
post_processing = True
def __init__(self, callback):
self.callback = callback
def post_process(self, model, dbdata):
if self.callback:
return self.callback(dbdata)

View File

@ -0,0 +1,56 @@
## Wrapper to handle some sort of record baseclass
class RecordHandler:
""" handler for a record type
defines how a record can be created, and how to retrieve all field names, and the required ones.
"""
@classmethod
def wrap(cls, klass):
return cls(klass)
def __init__(self, klass):
self.klass = klass
def create(self, **kwargs):
return self.klass(**kwargs)
def get_field_names(self):
return self.klass.__dict__.keys()
@property
def record(self):
return self.klass
@property
def required_arguments(self):
return self.get_field_names()
class RecordDictHandler(RecordHandler):
""" RecordHandler that outputs a dictionary """
def __init__(self, klass=None):
# it is not required to define dict, but you could do OrderedDict e.g.
self.klass = klass or dict
def get_field_names(self):
# dictionary has no required fields.
return []
class RecordDataclassHandler(RecordHandler):
""" handles dataclasses.dataclass derivatives """
def create(self, **kwargs):
# clean field names to be only valid if they are on the dataclass.
record_fields = self.get_field_names()
kwargs = {k: v for k, v in kwargs.items() if k in record_fields}
return self.klass(**kwargs)
def get_field_names(self):
return list(self.klass.__dataclass_fields__.keys())
# @TODO: namedtuple - dynamic generation?
# @TODO: RecordPydanticHAndler
# @TODO: RecordAttrsHandler

View File

@ -0,0 +1,66 @@
# Django Records
Create arbitrary classes instead of django models for usage in layered architectures.
## Records fetching Data directly into Dataclasses
##### The records() QuerySet and Manager command
The idea behind records() is to instead of directly transforming a Model into a dataclass, to fetch data from the database with a values() like call. However instead dictionaries that values() creates, it produces arbitrary classes or dataclasses, so, "records". It completely skips instantiation of a django model instance, and comes with tools to make it easy to handle initialization data, so that you can automate even production of immutable or nested data structures.
Out of the box, records assumes, you want to use `dataclasses.dataclass`
Usage:
```python
SomeModel.objects.filter(...).records(DataClass, 'field1', 'field2', annotation=F(...), adjunct=Adjunct(...))
```
##### How Records works and what it returns
Just like Values() returns an iterator, that will work with database calls in chunks, Records() returns an iterator, that will work exactly the same mechanically, but instead of producing dictionaries, it produces whatever you define as a "record".
This means, that you can also use things like `.first()` chained (which will return a single record)
records() works very similar to Django .values():
- args are fields that are selected.
- kwargs that are expressions are annotated into the key as field
However it also differs:
- The first (and only first) argument in args can be a target class to produce. See: `RecordHandler`.
- you should not define the fields, which are part of your target class for most handlers. They are added automatically to the values() call, by the handler.
- kwargs can contain a special kind of class, called an Adjunct, which is *not* included in the SQL, and allows "local data" to be inserted into the resulting class produced. See: `Adjunct`.
##### RecordHandler
The Dataclasses produced are handled by the `RecordHandler` class. This allows to adjust the creation of the dataclass, to whatever library you want to use. At the moment, an example for handling a dictionary, and to a dataclasses.dataclass.
> Todo: Pydantic, attrs, namedtuple.
By Default, it expects dataclasses.dataclass, and uses the `RecordDataclassHandler`.
The whole class is kept simple, so that you can write your own RecordHandler if you work with a different kind of dataclass.
Instead of providing a record class, you can also provide an instantiated record handler. For example, you could use RecordDictHandler(), and you would get dictionaries from records(). Which essentially would be the same output as values(). Except that you can use Adjuncts.
##### Adjunct
Just like Django Expressions can be used to annotate keys in the model that are retrieved, so can Adjuncts be used to circumvent this mechanic, and insert local data into your target class. You might want to use this, if e.g. the dataclass you create is *immutable*, or the dataclass you use has *required fields*, that need data when you create the class, but the data is not part of your database query.
Various Adjunct mechanics are available:
- `Adjunct` itself simply carries some data and inserts it into every model. e.g. `.records(data=Adjunct(1))` will set the field `data` always to 1.
- `Lambda` allows to use a callable as argument, which gets called when setting the field on the model. e.g. `.records(data=Lambda(lambda entry: 'x' in entry))`
- `Skip` allows you to skip a field. This is needed, as records() would include all fields on a dataclass, without knowing if it is optional.
- `Callback` allows you to call a function as a callback at creation - if the callback returns anything else than None, it is used as initializer for the production of the object.
Just as with expressions, you can of course also write your own Adjunct classes, by subclassing `BaseAdjunct`.
##### default_record defininition
If records() is not provided a class or handler, you can define a default either on the Manager or QuerySet, by using `_default_record`. You can also put this on the model class. If a record class is not found, an Exception is raised.

View File

@ -2,146 +2,13 @@ from django.db.models import QuerySet
from django.db.models.expressions import BaseExpression, Combinable
from django.db.models.query import ValuesIterable
from django.db.models.manager import Manager
from .adjuncts import BaseAdjunct
from .handlers import RecordDataclassHandler, RecordHandler
"""
Base Idea:
- to have a queryset function that easily allows you to build some sort of "record type class" in a queryset.
- make it able to seamlessly use annotation functions
- allow modification of initial values with callbacks, which is needed if your dataclass is frozen.
By default the RecordManager and RecordQuerySet will use RecordDataclass as handler, expecting your record to be a dataclasses.dataclass type.
An Example:
@dataclass
class MyDataClass
id: int
some_relation: str
next_id: int
SomeModel.objects.filter(...).records(MyDataClass, some_relation=F('model__relation'), next_id=Lambda(lambda x: x.get('id')+1))
This allows you to move an iterator into another layer, where it either can be consumed, or used as an input for another queryset call,
but still guaranteeing, that regular usage of the iterator will not yield any smart object.
It is good if you want to build a best-of-both-worlds approach for subquery-capable repository pattern, which is one of the biggest issues if you want
to keep your business logic out of the repo layer, but still want to utilize djangos queryset mechanics properly.
records() will take anything values() would take, but additionally it allows:
- to pass the record type as first argument
- to pass Adjunct classes as keyword argument value
"""
## Useful for queryset function records()
class BaseAdjunct:
"""
Any Adjunct data which does not translate into SQL, but rather adds data programmatically.
"""
skip = False # if skip is true, this adjunct will not be actually processed.
resolves_field = True # if resolves_field is true, this adjunct will be called for a single field with resolve()
post_processing = False # if post_processing is true, this adjunct will in the end be called with dbdata, and be able to manipulate the whole dictionary.
def resolve(self, model, dbdata):
raise NotImplementedError
def post_process(self, model, dbdata):
raise NotImplementedError
class Adjunct(BaseAdjunct):
""" value function that adds data, without SQL handling. """
def __init__(self, value=None):
self.value = value
def resolve(self, model, dbdata):
return self.value
class Lambda(Adjunct):
""" adjunct value that returns a field value with a callback. """
def __init__(self, callback):
self.callback = callback if callable(callback) else None
def resolve(self, model, dbdata):
# at this point i could check if callback needs 0-2 arguments and decide the call.
if self.callback:
return self.callback(dbdata)
class Skip(BaseAdjunct):
""" Skips this key from being retrieved from the database or used in the dataclass instantiation """
skip = True
resolves_field = False
class Callback(BaseAdjunct):
""" calls a callback which can modify the whole initialization dictionary. """
resolves_field = False
post_processing = True
def __init__(self, callback):
self.callback = callback
def post_process(self, model, dbdata):
if self.callback:
return self.callback(dbdata)
## Wrapper to handle some sort of record baseclass
class RecordHandler:
""" handler for a record type
defines how a record can be created, and how to retrieve all field names, and the required ones.
"""
@classmethod
def wrap(cls, klass):
return cls(klass)
def __init__(self, klass):
self.klass = klass
def create(self, **kwargs):
return self.klass(**kwargs)
def get_field_names(self):
return self.klass.__dict__.keys()
@property
def record(self):
return self.klass
@property
def required_keys(self):
return self.get_field_names()
class RecordDict(RecordHandler):
""" RecordHandler that outputs a dictionary """
def __init__(self, klass=None):
# it is not required to define dict, but you could do OrderedDict e.g.
self.klass = klass or dict
def get_field_names(self):
# dictionary has no required fields.
return []
class RecordDataclass(RecordHandler):
""" handles dataclasses.dataclass derivatives """
def create(self, **kwargs):
# clean field names to be only valid if they are on the dataclass.
record_fields = self.get_field_names()
kwargs = {k: v for k, v in kwargs.items() if k in record_fields}
return self.klass(**kwargs)
def get_field_names(self):
return list(self.klass.__dataclass_fields__.keys())
# @TODO: RecordPydantic
# @TODO: RecordAttrs
###### QuerySet Plugin.
class RecordIterable(ValuesIterable):
"""
Iterable returned by records() that yields a record class for each row.
Replaces the standard iterable of the queryset.
Iterable returned by records() and attached to it's queryset, that yields a record class for each row.
"""
def __iter__(self):
@ -149,8 +16,11 @@ class RecordIterable(ValuesIterable):
model = self.queryset.model
query = queryset.query
compiler = query.get_compiler(queryset.db)
record_data = getattr(queryset, '_record_extra', {})
record_data = getattr(queryset, '_record_kwargs', {})
try:
record_handler = queryset._record
except AttributeError:
raise AttributeError("The queryset lacks a _record entry, Is _clone copying that field on the QuerySet class?")
# extra(select=...) cols are always at the start of the row.
names = [
@ -179,7 +49,15 @@ class RecordIterable(ValuesIterable):
class RecordQuerySetMixin:
_record_handler = RecordDataclass
"""
Actual records() implementation.
As records() calls values(), the queryset is chain-cloned there.
You can mix this in into your QuerySet and Manager classes.
However make sure in QuerySet classes to implement _clone properly (or use RecordQuerySet instead)
"""
_record_handler = RecordDataclassHandler
def records(self, *args, **kwargs):
"""
@ -187,10 +65,10 @@ class RecordQuerySetMixin:
Acts like values(), however:
- you can pass a record type or RecordHandler as first argument.
- if record type is not defined in records(), you have to define it on the queryset, or the model, with _record,
- if record type is not defined in records(), you have to define it on the queryset, or the model, with _default_record,
otherwise it will raise a RuntimeError.
- keyword arguments of type "Adjunct" are used as deferred values, and resolved independently.
- values() is called with every required_key on the dataclass not handled by an Adjunct
- values() is called with every required_argument on the dataclass not handled by an Adjunct
"""
if len(args) and not isinstance(args[0], str):
# we assume this is our dataclass
@ -199,7 +77,7 @@ class RecordQuerySetMixin:
args = args[1:]
else:
# determine dataclass.
handler = getattr(self, '_record', getattr(self.model, '_record', None))
handler = getattr(self, '_default_record', getattr(self.model, '_default_record', getattr(self, '_record', None)))
if not handler:
raise RuntimeError("Trying to records a class without destination class.")
@ -207,38 +85,47 @@ class RecordQuerySetMixin:
handler = self._record_handler.wrap(handler)
all_keys = [*args, *kwargs.keys()]
unhandled_keys = list(set(handler.required_keys) - set(all_keys))
unhandled_keys = list(set(handler.required_arguments) - set(all_keys))
args = [*args, *unhandled_keys]
# rebuild keyword arguments for values, by filtering out our adjuncts
new_kw = {}
extra = {}
adjuncts = {}
for k, v in kwargs.items():
if isinstance(v, BaseAdjunct):
# skip allows an adjunct to completely ignore a key.
if not v.skip:
extra[k] = v
elif isinstance(v, BaseExpression) or isinstance(v, Combinable):
adjuncts[k] = v
# check if we have to add to values. adjuncts can define a field to add here.
add_to_values = v.values_field()
if isinstance(add_to_values, str) and add_to_values not in args:
args.append(add_to_values)
elif isinstance(add_to_values, tuple):
new_kw[add_to_values[0]] = add_to_values[1]
elif isinstance(v, BaseExpression) or isinstance(v, Combinable) or hasattr(v, 'resolve_expression'):
new_kw[k] = v
else:
# this will fail in values() for now, but i do not want to hijack future django functionality here.
new_kw[k] = v
# copy ourself with values() and save the results on the cloned queryset.
# copy ourself with values() and save the results on the cloned queryset values produces.
values = self.values(*args, **new_kw)
values._iterable_class = RecordIterable
values._record_extra = extra
values._record_kwargs = adjuncts
values._record = handler
return values
class RecordQuerySet(RecordQuerySetMixin, QuerySet):
# overwrite cloning.
# overwrite cloning. important.
def _clone(self):
c = super()._clone()
for key in ['_record', '_record_extra', '_record_handler']:
for key in ['_record', '_record_kwargs', '_record_handler', '_default_record']:
if hasattr(self, key):
setattr(c, key, getattr(self, key))
return c
# i use a mixin instead for better clarity. records is completely safe, as it does not call _chain.
# i use a mixin instead for better clarity with intellisense systems. records is completely safe, as it does not call _chain.
# however you can also simply do:
#class RecordManager(BaseManager.from_queryset(RecordQuerySet)):
# pass

View File

@ -3,11 +3,11 @@ from unittest.case import skipIf
from django.db.models import F
from django.test.testcases import TestCase
from django.test.utils import tag
import mock
from ..records import Lambda, Adjunct, Callback
from django.test.utils import tag
from click.types import INT
from ..adjuncts import Lambda, Adjunct, Callback
from ..handlers import RecordDictHandler
try:
@ -22,6 +22,7 @@ except RuntimeError:
class Entity:
id: int
@dataclass
class SpaceRock:
id: int
@ -29,6 +30,7 @@ class SpaceRock:
orbits_name: str
is_moon: bool
@tag('library')
@skipIf(not celestials_installed, "Celestials Testpackage not installed into INSTALLED_APPS.")
class TestQueryBuilder(TestCase):
@ -51,6 +53,11 @@ class TestQueryBuilder(TestCase):
self.assertEqual(len(self.moons), len(Celestial.objects.filter(
orbits__in=Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4)).values_list('id', flat=True)))
def test_handler_dict(self):
entities = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4).records(RecordDictHandler())
self.assertEqual(len(entities), len(self.planets))
self.assertIsInstance(entities.first(), dict)
def test_lambda(self):
# this tests whether our own celestial type or the celestial type of what we orbit is correct for being a moon. parameter is a dictionary.
@ -92,6 +99,8 @@ class TestQueryBuilder(TestCase):
you could e.g. use it in a subquery, while still access the data.
however this will hit the database twice if you evaluate the iterator yourself, as the lambda is not lazy.
note this is mainly a thought experiment.
"""
planet_queryset = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4)
@ -123,4 +132,3 @@ class TestQueryBuilder(TestCase):
# but...
self.assertEqual(len(my_moons), len(self.moons))
self.assertEqual(get_details.call_count, len(self.planets) + len(self.moons))