Skip to content

Commit

Permalink
Merge pull request #4 from M1hacka/cardinality-sum-function
Browse files Browse the repository at this point in the history
Added CardinalitySum function
  • Loading branch information
M1ha-Shvn authored Oct 8, 2019
2 parents 05dabf9 + 2fe7192 commit 4e04ae2
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 12 deletions.
18 changes: 13 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,21 @@ MyModel.objects.filter(hll__cardinality=3).count()
```

### Aggregate functions
In order to count aggregations and annotations, library provides 3 aggregate functions:
In order to count aggregations and annotations, library provides 4 aggregate functions:
* `django_pg_hll.aggregate.Cardinality`
Counts cardinality of hll field
* `django_pg_hll.aggregate.UnionAgg`
Aggregates multiple hll fields to one hll.
* `django_pg_hll.aggregate.UnionAggCardinality`
Counts cardinality of hll, combined by UnionAgg function. In fact, it does `Cardinality(UnionAgg(hll))`.
P. s. django doesn't give ability to use function inside function.
* `django_pg_hll.aggregate.CardinalitySum`
Counts sum of multiple rows hll cardinalities. In fact, it does `Sum(Cardinality(hll))`.
P. s. django doesn't give ability to use function inside function.
```python
from django.db import models
from django_pg_hll import HllField, HllInteger
from django_pg_hll.aggregate import Cardinality, UnionAggCardinality
from django_pg_hll.aggregate import Cardinality, UnionAggCardinality, CardinalitySum


class ForeignModel(models.Model):
Expand All @@ -130,16 +133,21 @@ class MyModel(models.Model):

MyModel.objects.bulk_create([
MyModel(fk=1, hll=HllInteger(1)),
MyModel(fk=2, hll=HllInteger(2) | HllInteger(3)),
MyModel(fk=2, hll=HllInteger(2) | HllInteger(3) | HllInteger(4)),
MyModel(fk=3, hll=HllInteger(4))
])

MyModel.objects.annotate(card=Cardinality('hll_field')).values_list('id', 'card')
# outputs (1, 1), (2, 2), (3, 1)
# outputs (1, 1), (2, 3), (3, 1)

# Count cardinality for hll, built from
# Count cardinality for hll, build by union of all rows
# 4 element exists in rows with fk=2 and fk=3. After union it gives single result
ForeignModel.objects.annotate(card=UnionAggCardinality('testmodel__hll_field')).values_list('card', flat=True)
# outputs [4]

# Count sum of cardinalities for each row
ForeignModel.objects.annotate(card=CardinalitySum('testmodel__hll_field')).values_list('card', flat=True)
# outputs [5]
```

### [django-pg-bulk-update](https://github.com/M1hacka/django-pg-bulk-update) integration
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setup(
name='django-pg-hll',
version='1.1.0',
version='1.2.0',
packages=['django_pg_hll'],
package_dir={'': 'src'},
url='https://github.com/M1hacka/django-pg-hll',
Expand Down
12 changes: 11 additions & 1 deletion src/django_pg_hll/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,21 @@ class UnionAgg(Aggregate):
output_field = HllField()


class UnionAggCardinality(UnionAgg):
class UnionAggCardinality(Aggregate):
"""
I haven't found a way to combine function inside function in django.
So, I've written function to get aggregate cardinality with one call
"""
function = 'hll_union_agg'
template = 'hll_cardinality(%(function)s(%(expressions)s))'
output_field = IntegerField()


class CardinalitySum(Aggregate):
"""
I haven't found a way to combine function inside function in django.
So, I've written function to get sum cardinality with one call
"""
function = 'hll_cardinality'
template = 'SUM(%(function)s(%(expressions)s))'
output_field = IntegerField()
20 changes: 15 additions & 5 deletions tests/test_hll_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.test import TestCase

from django_pg_hll import HllEmpty, HllInteger
from django_pg_hll.aggregate import Cardinality, UnionAgg, UnionAggCardinality
from django_pg_hll.aggregate import Cardinality, UnionAgg, UnionAggCardinality, CardinalitySum

# !!! Don't remove this import, or bulk_update will not see function name
from django_pg_hll.bulk_update import HllConcatFunction
Expand Down Expand Up @@ -92,17 +92,18 @@ class TestAggregation(TestCase):
def setUp(self):
TestModel.objects.bulk_create([
TestModel(id=100501, hll_field=HllEmpty()),
TestModel(id=100502, hll_field=HllInteger(1)),
TestModel(id=100502, hll_field=HllInteger(1) | HllInteger(2)),
TestModel(id=100503, hll_field=HllInteger(2))
])

def test_cardinality_transform_filter(self):
self.assertEqual(1, TestModel.objects.filter(hll_field__cardinality=0).count())
self.assertEqual(2, TestModel.objects.filter(hll_field__cardinality=1).count())
self.assertEqual(0, TestModel.objects.filter(hll_field__cardinality=2).count())
self.assertEqual(1, TestModel.objects.filter(hll_field__cardinality=1).count())
self.assertEqual(1, TestModel.objects.filter(hll_field__cardinality=2).count())
self.assertEqual(0, TestModel.objects.filter(hll_field__cardinality=3).count())

def test_cardinality_aggregate_function(self):
self.assertEqual({0, 1}, set(TestModel.objects.annotate(card=Cardinality('hll_field')).
self.assertEqual({0, 1, 2}, set(TestModel.objects.annotate(card=Cardinality('hll_field')).
values_list('card', flat=True)))

def test_union_aggregate_function(self):
Expand All @@ -126,6 +127,15 @@ def test_union_aggregate_cardinality_function(self):

self.assertEqual(2, card)

def test_cardinality_sum_function(self):
fk_instance = FKModel.objects.create()
TestModel.objects.all().update(fk=fk_instance)

card = FKModel.objects.annotate(card=CardinalitySum('testmodel__hll_field')).\
values_list('card', flat=True)[0]

self.assertEqual(3, card)


@skipIf(not django_pg_bulk_update_available(), 'django-pg-bulk-update library is not installed')
class TestBulkUpdate(TestCase):
Expand Down

0 comments on commit 4e04ae2

Please sign in to comment.