work from weekend

2021-05-25 18:07:16 +02:00
parent c1fde84ef7
commit 006ab18a19
28 changed files with 1096 additions and 0 deletions
--- a/blackmesa/records/init.py
+++ b/blackmesa/records/init.py
--- a/blackmesa/records/records.py
+++ b/blackmesa/records/records.py
@@ -0,0 +1,249 @@
+from django.db.models import QuerySet
+from django.db.models.expressions import BaseExpression, Combinable
+from django.db.models.query import ValuesIterable
+from django.db.models.manager import Manager
+
+"""
+
+    Base Idea:
+        - to have a queryset function that easily allows you to build some sort of "record type class" in a queryset.
+        - make it able to seamlessly use annotation functions
+        - allow modification of initial values with callbacks, which is needed if your dataclass is frozen.
+    
+    By default the RecordManager and RecordQuerySet will use RecordDataclass as handler, expecting your record to be a dataclasses.dataclass type.
+    
+    An Example:
+    @dataclass
+    class MyDataClass
+        id: int
+        some_relation: str
+        next_id: int
+    
+    SomeModel.objects.filter(...).records(MyDataClass, some_relation=F('model__relation'), next_id=Lambda(lambda x: x.get('id')+1))
+    
+    This allows you to move an iterator into another layer, where it either can be consumed, or used as an input for another queryset call,
+    but still guaranteeing, that regular usage of the iterator will not yield any smart object.
+    It is good if you want to build a best-of-both-worlds approach for subquery-capable repository pattern, which is one of the biggest issues if you want
+    to keep your business logic out of the repo layer, but still want to utilize djangos queryset mechanics properly.
+    
+    records() will take anything values() would take, but additionally it allows:
+        - to pass the record type as first argument
+        - to pass Adjunct classes as keyword argument value
+    
+"""
+
+## Useful for queryset function records()
+class BaseAdjunct:
+    """
+        Any Adjunct data which does not translate into SQL, but rather adds data programmatically.
+    """
+    skip = False # if skip is true, this adjunct will not be actually processed.
+    resolves_field = True # if resolves_field is true, this adjunct will be called for a single field with resolve()
+    post_processing = False # if post_processing is true, this adjunct will in the end be called with dbdata, and be able to manipulate the whole dictionary.
+
+    def resolve(self, model, dbdata):
+        raise NotImplementedError
+
+    def post_process(self, model, dbdata):
+        raise NotImplementedError
+
+class Adjunct(BaseAdjunct):
+    """ value function that adds data, without SQL handling. """
+    def __init__(self, value=None):
+        self.value = value
+
+    def resolve(self, model, dbdata):
+        return self.value
+
+class Lambda(Adjunct):
+    """ adjunct value that returns a field value with a callback. """
+    def __init__(self, callback):
+        self.callback = callback if callable(callback) else None
+
+    def resolve(self, model, dbdata):
+        # at this point i could check if callback needs 0-2 arguments and decide the call. 
+        if self.callback:
+            return self.callback(dbdata)
+        
+class Skip(BaseAdjunct):
+    """ Skips this key from being retrieved from the database or used in the dataclass instantiation """
+    skip = True
+    resolves_field = False
+
+class Callback(BaseAdjunct):
+    """ calls a callback which can modify the whole initialization dictionary. """
+    resolves_field = False
+    post_processing = True
+    def __init__(self, callback):
+        self.callback = callback
+    def post_process(self, model, dbdata):
+        if self.callback:
+            return self.callback(dbdata)
+
+## Wrapper to handle some sort of record baseclass
+class RecordHandler:
+    """ handler for a record type 
+    
+        defines how a record can be created, and how to retrieve all field names, and the required ones.
+    """
+
+    @classmethod
+    def wrap(cls, klass):
+        return cls(klass)
+
+    def __init__(self, klass):
+        self.klass = klass
+
+    def create(self, **kwargs):
+        return self.klass(**kwargs)
+
+    def get_field_names(self):
+        return self.klass.__dict__.keys()
+
+    @property
+    def record(self):
+        return self.klass
+
+    @property
+    def required_keys(self):
+        return self.get_field_names()
+
+
+class RecordDict(RecordHandler):
+    """ RecordHandler that outputs a dictionary """
+
+    def __init__(self, klass=None):
+        # it is not required to define dict, but you could do OrderedDict e.g.
+        self.klass = klass or dict
+
+    def get_field_names(self):
+        # dictionary has no required fields.
+        return []
+
+
+class RecordDataclass(RecordHandler):
+    """ handles dataclasses.dataclass derivatives """
+
+    def create(self, **kwargs):
+        # clean field names to be only valid if they are on the dataclass.
+        record_fields = self.get_field_names()
+        kwargs = {k: v for k, v in kwargs.items() if k in record_fields}
+        return self.klass(**kwargs)
+
+    def get_field_names(self):
+        return list(self.klass.__dataclass_fields__.keys())
+
+# @TODO: RecordPydantic
+# @TODO: RecordAttrs
+
+###### QuerySet Plugin.
+
+class RecordIterable(ValuesIterable):
+    """
+    Iterable returned by records() that yields a record class for each row.
+    Replaces the standard iterable of the queryset.
+    """
+
+    def __iter__(self):
+        queryset = self.queryset
+        model = self.queryset.model
+        query = queryset.query
+        compiler = query.get_compiler(queryset.db)
+        record_data = getattr(queryset, '_record_extra', {})
+        record_handler = queryset._record
+
+        # extra(select=...) cols are always at the start of the row.
+        names = [
+            *query.extra_select,
+            *query.values_select,
+            *query.annotation_select,
+        ]
+        indexes = range(len(names))
+
+        for row in compiler.results_iter(chunked_fetch=self.chunked_fetch, chunk_size=self.chunk_size):
+            dbdata = {names[i]: row[i] for i in indexes}
+            # post-processors will be able to rewrite the whole dictionary.
+            post_processors = []
+            # we overwrite db data bluntly for now. actually we would provide callbacks the current dict.
+            for k, v in record_data.items():
+                if v.resolves_field:
+                    dbdata[k] = v.resolve(model, dbdata)
+                if v.post_processing:
+                    post_processors.append(v)
+            if post_processors:
+                for processor in post_processors:
+                    processed = processor.post_process(model, dbdata)
+                    if processed is not None:
+                        dbdata = processed
+            yield record_handler.create(**dbdata)
+
+
+class RecordQuerySetMixin:
+    _record_handler = RecordDataclass
+
+    def records(self, *args, **kwargs):
+        """
+            generates record objects
+        
+            Acts like values(), however:
+                - you can pass a record type or RecordHandler as first argument.
+                - if record type is not defined in records(), you have to define it on the queryset, or the model, with _record,
+                  otherwise it will raise a RuntimeError.
+                - keyword arguments of type "Adjunct" are used as deferred values, and resolved independently.
+                - values() is called with every required_key on the dataclass not handled by an Adjunct
+        """
+        if len(args) and not isinstance(args[0], str):
+            # we assume this is our dataclass
+            # @TODO better checks.
+            handler = args[0]
+            args = args[1:]
+        else:
+            # determine dataclass.
+            handler = getattr(self, '_record', getattr(self.model, '_record', None))
+            if not handler:
+                raise RuntimeError("Trying to records a class without destination class.")
+
+        if not isinstance(handler, RecordHandler):
+            handler = self._record_handler.wrap(handler)
+
+        all_keys = [*args, *kwargs.keys()]
+        unhandled_keys = list(set(handler.required_keys) - set(all_keys))
+        args = [*args, *unhandled_keys]
+
+        new_kw = {}
+        extra = {}
+        for k, v in kwargs.items():
+            if isinstance(v, BaseAdjunct):
+                if not v.skip:
+                    extra[k] = v
+            elif isinstance(v, BaseExpression) or isinstance(v, Combinable):
+                new_kw[k] = v
+            else:
+                new_kw[k] = v
+
+        # copy ourself with values() and save the results on the cloned queryset.
+        values = self.values(*args, **new_kw)
+        values._iterable_class = RecordIterable
+        values._record_extra = extra
+        values._record = handler
+        return values
+
+
+class RecordQuerySet(RecordQuerySetMixin, QuerySet):
+    # overwrite cloning.
+    def _clone(self):
+        c = super()._clone()
+        for key in ['_record', '_record_extra', '_record_handler']:
+            if hasattr(self, key):
+                setattr(c, key, getattr(self, key))
+        return c
+
+# i use a mixin instead for better clarity. records is completely safe, as it does not call _chain.
+# however you can also simply do:
+#class RecordManager(BaseManager.from_queryset(RecordQuerySet)):
+#    pass
+
+
+class RecordManager(RecordQuerySetMixin, Manager):
+    def get_queryset(self):
+        return RecordQuerySet(self.model, using=self._db)
--- a/blackmesa/records/tests/init.py
+++ b/blackmesa/records/tests/init.py
--- a/blackmesa/records/tests/celestials/init.py
+++ b/blackmesa/records/tests/celestials/init.py
--- a/blackmesa/records/tests/celestials/app.py
+++ b/blackmesa/records/tests/celestials/app.py
@@ -0,0 +1,7 @@
+from django.apps import AppConfig
+
+# Only include tests in your INSTALLED_APPS if you want to test against django models.
+
+class CommonTestsConfig(AppConfig):
+    name = 'tests.celestials'
+    label = 'celestials_tests'
--- a/blackmesa/records/tests/celestials/factories.py
+++ b/blackmesa/records/tests/celestials/factories.py
@@ -0,0 +1,47 @@
+from . import models
+import factory
+import factory.fuzzy
+
+
+class CelestialFactory(factory.django.DjangoModelFactory):
+    orbits = factory.LazyAttribute(lambda c: CelestialFactory(celestial_type=c.celestial_type - 1) if c.celestial_type and c.celestial_type > 1 else None)
+    name = factory.Faker('city')
+    # 1 sun, 7 planets, 3 moons, 4 asteroids, 5 stations
+    celestial_type = factory.Iterator([1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5])
+    weight = factory.fuzzy.FuzzyFloat(100.0, 100000.0)
+    size = factory.fuzzy.FuzzyFloat(1.0, 8.0)
+    
+    class Meta:
+        model = models.Celestial
+
+class PersonFactory(factory.DjangoModelFactory):
+    origin = factory.SubFactory(CelestialFactory)
+    first_name = factory.Faker('first_name')
+    last_name = factory.Faker('last_name')
+    age = factory.fuzzy.FuzzyInteger(9, 79)
+    
+    class Meta:
+        model = models.Person
+
+class SpaceportFactory(factory.DjangoModelFactory):
+    name = factory.LazyAttribute(lambda sp: f'Port {sp.celestial.name}')
+    celestial = factory.SubFactory(CelestialFactory, celestial_type=factory.Iterator([2,2,3,4,5]))
+    
+    class Meta:
+        model = models.Spaceport
+
+class VisitorFactory(factory.DjangoModelFactory):
+    person = factory.SubFactory(PersonFactory)
+    spaceport = factory.SubFactory(SpaceportFactory)
+    luggage_weight = factory.fuzzy.FuzzyFloat(1.0, 100.0)
+    
+    class Meta:
+        model = models.Visitor
+
+class CitizenFactory(factory.DjangoModelFactory):
+    planet = factory.SubFactory(CelestialFactory, celestial_type=2)
+    person = factory.SubFactory(PersonFactory, origin=factory.SelfAttribute('planet'))
+    clearance_level = factory.fuzzy.FuzzyInteger(0, 4)
+    class Meta:
+        model = models.Citizen
+
--- a/blackmesa/records/tests/celestials/galaxy.py
+++ b/blackmesa/records/tests/celestials/galaxy.py
@@ -0,0 +1,47 @@
+from .factories import CelestialFactory, SpaceportFactory
+
+
+class Stars:
+    
+    @classmethod
+    def create_sol(cls, context=None):
+        if context is None:
+            context = object()
+        
+        celestials = [CelestialFactory(name="Sol", celestial_type=1, size=100)]
+        context.sun = sun = celestials[0]
+        
+        context.planets = planets = [
+                    CelestialFactory(name='Mercur', celestial_type=2, orbits=sun, size=2.4), #0
+                    CelestialFactory(name='Venus',  celestial_type=2, orbits=sun, size=6),
+                    CelestialFactory(name='Terra',  celestial_type=2, orbits=sun, size=6.4), #2
+                    CelestialFactory(name='Mars',   celestial_type=2, orbits=sun, size=3.4),
+                    CelestialFactory(name='Jupiter',celestial_type=2, orbits=sun, size=69.9), #4
+                    CelestialFactory(name='Saturn', celestial_type=2, orbits=sun, size=58.2),
+                    CelestialFactory(name='Uranus', celestial_type=2, orbits=sun, size=25.4), #6
+                    CelestialFactory(name='Neptune',celestial_type=2, orbits=sun, size=24.6),
+                    CelestialFactory(name='Pluto',celestial_type=3,   orbits=sun, size=1.1), #8
+                   ]
+        celestials.extend(planets)
+        
+        context.moons = moons = [
+                   CelestialFactory(name='Luna', celestial_type=3, orbits=planets[2], size=1.7), #0
+                   CelestialFactory(name='Phobos', celestial_type=4, orbits=planets[3], size=0.006),
+                   CelestialFactory(name='Deimos', celestial_type=4, orbits=planets[3], size=0.011), #2
+                   CelestialFactory(name='Io', celestial_type=3, orbits=planets[4], size=1.8),
+                   CelestialFactory(name='Europa', celestial_type=3, orbits=planets[4], size=1.5), #4
+                   CelestialFactory(name='Ganymede', celestial_type=3, orbits=planets[4], size=2.6),
+                   CelestialFactory(name='Callisto', celestial_type=3, orbits=planets[4], size=2.4), #6
+                   #...
+                   CelestialFactory(name='Charon', celestial_type=4, orbits=planets[8], size=0.6)
+                ]
+        celestials.extend(moons)
+        context.celestials = celestials
+        
+        # create space ports
+        context.spaceports = [
+            SpaceportFactory(celestial=planets[2], name="Houston IPS", ),
+            SpaceportFactory(celestial=moons[0], name='Copernicus'),
+            SpaceportFactory(celestial=planets[3], name='Utopia Planitia'),
+            SpaceportFactory(celestial=moons[2], name='Ares Station'),
+        ]
--- a/blackmesa/records/tests/celestials/models.py
+++ b/blackmesa/records/tests/celestials/models.py
@@ -0,0 +1,50 @@
+from django.db import models
+from ...records import RecordManager
+
+class Celestial(models.Model):
+    CELESTIAL_TYPES = ((0, 'Unknown'),
+                       (1, 'Star'),
+                       (2, 'Planet'),
+                       (3, 'Planetoid'),
+                       (4, 'Asteroid'),
+                       (5, 'Station'))
+    orbits = models.ForeignKey('self', blank=True, null=True, related_name='orbitals', on_delete=models.CASCADE)
+    name = models.CharField(max_length=100)
+    celestial_type = models.IntegerField(choices=CELESTIAL_TYPES, default=int)
+    weight = models.FloatField(default=float)
+    size = models.FloatField(default=float)
+    
+    objects = RecordManager()
+    
+    @property
+    def is_moon(self):
+        return 5 > self.celestial_type > 1 and self.orbits and 5 > self.orbits.celestial_type > 1
+
+class Spaceport(models.Model):
+    name = models.CharField(max_length=100)
+    celestial = models.ForeignKey(Celestial, related_name='spaceports', on_delete=models.CASCADE)
+    
+    objects = RecordManager()
+
+class Person(models.Model):
+    origin = models.ForeignKey(Celestial, related_name='children', blank=True, null=True, on_delete=models.CASCADE)
+    first_name = models.CharField(max_length=100)
+    last_name = models.CharField(max_length=100)
+    age = models.IntegerField(blank=True, null=True)
+    
+    objects = RecordManager()
+
+class Visitor(models.Model):
+    person = models.ForeignKey(Person, related_name='as_visitor', on_delete=models.CASCADE)
+    spaceport = models.ForeignKey(Spaceport, related_name='visitors', on_delete=models.CASCADE)
+    luggage_weight = models.FloatField(blank=True, null=True, default=float)
+    
+    objects = RecordManager()
+
+class Citizen(models.Model):
+    planet = models.ForeignKey(Celestial, related_name='citizens', on_delete=models.CASCADE)
+    person = models.ForeignKey(Person, related_name='citizenships', on_delete=models.CASCADE)
+    clearance_level = models.IntegerField(blank=True, null=True)
+
+    objects = RecordManager()
+
--- a/blackmesa/records/tests/celestials/readme.md
+++ b/blackmesa/records/tests/celestials/readme.md
@@ -0,0 +1,7 @@
+# Celestials Test Database
+
+this is a royalty free no guarantees given test case
+
+The galaxy.Stars helper class can create a sun system as test data.
+
+Part of StarGenerator
--- a/blackmesa/records/tests/celestials/requirements.in
+++ b/blackmesa/records/tests/celestials/requirements.in
@@ -0,0 +1,2 @@
+Django < 3
+factory
--- a/blackmesa/records/tests/test_record_queries.py
+++ b/blackmesa/records/tests/test_record_queries.py
@@ -0,0 +1,126 @@
+from dataclasses import dataclass
+from unittest.case import skipIf
+
+from django.db.models import F
+from django.test.testcases import TestCase
+import mock
+
+from ..records import Lambda, Adjunct, Callback
+from django.test.utils import tag
+from click.types import INT
+
+
+try:
+    from .celestials.models import Celestial
+    from .celestials.galaxy import Stars
+    celestials_installed = True
+except RuntimeError:
+    celestials_installed = False
+
+
+@dataclass
+class Entity:
+    id: int
+
+@dataclass
+class SpaceRock:
+    id: int
+    name: str
+    orbits_name: str
+    is_moon: bool
+
+@tag('library')
+@skipIf(not celestials_installed, "Celestials Testpackage not installed into INSTALLED_APPS.")
+class TestQueryBuilder(TestCase):
+
+    def setUp(self):
+        super().setUp()
+        Stars.create_sol(context=self)
+
+    def test_records(self):
+        entities = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4).records(Entity)
+        self.assertEqual(len(entities), len(self.planets))
+
+        # test whether we really return dataclass as result, even with first.
+        self.assertIsInstance(entities.first(), Entity)
+
+        # find moons. test whether i can use entities to do an SQL query. works because i have only one key.
+        self.assertEqual(len(self.moons), Celestial.objects.filter(orbits__in=entities).count())
+        
+        # this is pretty much the same as
+        self.assertEqual(len(self.moons), len(Celestial.objects.filter(
+            orbits__in=Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4)).values_list('id', flat=True)))
+
+    def test_lambda(self):
+
+        # this tests whether our own celestial type or the celestial type of what we orbit is correct for being a moon. parameter is a dictionary.
+        is_moon = lambda entry: True if 5 > (entry.get('celestial_type') or 0) > 1 and 5 > (entry.get('orbits_type') or 0) > 1 else False
+
+        entities = Celestial.objects.records(SpaceRock,  # we want our output to be a SpaceRock dataclass.
+                                    'celestial_type',  #  we include the key celestial_type into our query.
+                                    id=Adjunct(None),  # we blank out id to test Adjunct working.
+                                    orbits_name=F('orbits__name'),  # we set our custom orbits_name to a related field value
+                                    orbits_type=F('orbits__celestial_type'),  # our lambda needs this data.
+                                    is_moon=Lambda(is_moon))  # lambda over result
+
+        self.assertEqual(len(entities), len(self.celestials))
+
+        for idx, entity in enumerate(entities):
+            dbdata = self.celestials[idx]
+            model = Celestial.objects.filter(id=dbdata.id).first()
+            self.assertEqual(entity.name, dbdata.name)
+            self.assertIsNone(entity.id)
+            self.assertEqual(entity.is_moon, model.is_moon)
+
+    def test_callbacks(self):
+        side_effect = lambda x:x
+        callback_one = mock.Mock(side_effect=side_effect)
+        callback_two = mock.Mock(side_effect=side_effect)
+
+        entities = Celestial.objects.all().records(Entity, callback_one=Callback(callback_one), callback_two=Callback(callback_two))
+
+        self.assertEqual(len(entities), len(self.celestials))
+        self.assertEqual(callback_one.call_count, len(self.celestials))
+        self.assertEqual(callback_two.call_count, len(self.celestials))
+    
+    def test_double_value_technique(self):
+        """
+            Records open a new sort of technique for late calling details from dataclasses.
+            imagine you have a dataclass called EntityIndex, which has only one field from the database: id.
+            
+            however it has custom fields, where you store a lambda expression.
+            
+            you could e.g. use it in a subquery, while still access the data.
+            however this will hit the database twice if you evaluate the iterator yourself, as the lambda is not lazy.
+        """
+        
+        planet_queryset = Celestial.objects.filter(orbits__name='Sol', celestial_type__lte=4)
+        
+        @dataclass
+        class DetailedEntity:
+            id: int
+            name: str
+        
+        @dataclass
+        class IndexEntity:
+            id: int
+            detail: DetailedEntity
+        
+        def get_details_exec(data):
+            return Celestial.objects.filter(pk=data.get('id')).records(DetailedEntity).first()
+        
+        get_details = mock.Mock(side_effect=get_details_exec)
+        
+        # retrieves data per key only.
+        my_planets = planet_queryset.records(IndexEntity, detail=Lambda(get_details))
+        my_moons = Celestial.objects.filter(orbits__in=my_planets).records(IndexEntity, detail=Lambda(get_details)) # legal
+        # django does not consume the iterator internally for subqueries:
+        self.assertEqual(get_details.call_count, 0)
+        # consume it ourselves...
+        for planet in my_planets:
+            self.assertIsNotNone(planet.detail.name)
+        self.assertEqual(get_details.call_count, len(self.planets))
+        # but...
+        self.assertEqual(len(my_moons), len(self.moons))
+        self.assertEqual(get_details.call_count, len(self.planets) + len(self.moons))
+