Skip to content

Commit 3c88218

Browse files
committed
Improve fetching shelflist row numbers from Redis
After optimizing the 'utils.redisobjs' Redis interface, I'm going through the codebase and, where possible, making changes to take advantage of the optimizations and new features. This commit makes two specific improvements to how the shelflistItem resource's 'rowNumber' field is fetched from Redis. 1. Reduces the number of requests made to Redis to fetch row numbers for a page of results (in a list view). Before, we submitted one individual request per item. (A page with 500 items meant 500 separate requests, just for row numbers.) But now, since the 'utils.redisobjs.RedisObject' allows you to look up the index position for multiple values with one request, we can look up a page's worth of row numbers with one request and one Redis command. 2. We briefly cache the results of the "page" row-number request (using the SerializerWithLookups features) such that a request for a detail view of one item tries to use the cache first before sending another request to Redis. Hitting the page / list view again always refreshes the cached row numbers, which should prevent that cache from going stale. I've also added tests that specifically test the new row number fetching, caching, and Redis interaction to make sure it all works as intended.
1 parent b98f8d3 commit 3c88218

3 files changed

Lines changed: 220 additions & 17 deletions

File tree

django/sierra/api/simpleserializers.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -513,13 +513,21 @@ def get_db_objects(self, model, key_field, match_field, prefetch):
513513
def cache_lookup(self, fname, values):
514514
self._lookup_cache[fname] = values
515515

516-
def get_lookup_value(self, fname, lookup_code):
516+
def get_lookup_value(self, fname, lookup_code, refresh=True):
517517
"""
518-
Child classes can implement `refresh_{lookup}` methods for
519-
refreshing the cache of each lookup from the source.
518+
Returns a cached lookup value by field and code.
519+
520+
When there's a cache miss, it will by default attempt to
521+
refresh the lookup and try again. You can disable this by
522+
sending `False` for the `refresh` kwarg.
523+
524+
When `refresh` is True, it tries to find a `refresh_{fname}`
525+
method, where "fname" is the name of the lookup field. If not
526+
found, then it calls `cache_all_lookups`. Subclasses should
527+
implement these methods as necessary.
520528
"""
521529
val = self._lookup_cache.get(fname, {}).get(lookup_code)
522-
if val is None:
530+
if val is None and refresh:
523531
if lookup_code is None:
524532
return None
525533
getattr(self, f'refresh_{fname}', self.cache_all_lookups)()

django/sierra/shelflist/serializers.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,23 @@ def present(self, obj_data):
5555

5656
class RowNumberField(api_serializers.SimpleIntField):
5757
def present(self, obj_data):
58-
r = RedisObject('shelflistitem_manifest',
59-
obj_data.get('location_code'))
60-
return r.get_index(obj_data.get('id'))
58+
# Note: We don't have to cast the result to int because
59+
# it's coming from the index position in a Redis zset which
60+
# already gets returned as an int.
61+
row_num = obj_data['row_number']
62+
if row_num is None:
63+
location_code = obj_data.get('location_code')
64+
r = RedisObject('shelflistitem_manifest', location_code)
65+
return r.get_index(obj_data['id'])
66+
return row_num
6167

6268
obj_interface = SimpleObjectInterface(solr.Result)
6369
fields = [
6470
LinksField('_links', derived=True),
6571
api_serializers.SimpleStrField('id', filterable=True),
6672
api_serializers.SimpleStrField('record_number', source='id',
6773
filterable=True),
68-
RowNumberField('row_number', derived=True),
74+
RowNumberField('row_number', derived=True),
6975
api_serializers.CallNumberField('call_number', filterable=True,
7076
filter_source='call_number_search'),
7177
api_serializers.SimpleStrField('call_number_type', filterable=True),
@@ -90,8 +96,20 @@ def present(self, obj_data):
9096
filterable=True),
9197
]
9298

99+
def __init__(self, *args, **kwargs):
100+
self.item_ids = []
101+
super().__init__(*args, **kwargs)
102+
93103
def cache_all_lookups(self):
94104
self.refresh_status()
105+
self.refresh_row_numbers()
106+
107+
def to_representation(self, obj):
108+
if self.obj_interface.obj_is_many(obj):
109+
self.item_ids = [item['id'] for item in obj]
110+
# The superclass ends up running self.cache_all if this is a
111+
# page view (self.obj_interface.obj_is_many(obj) == True)
112+
return super().to_representation(obj)
95113

96114
def refresh_status(self):
97115
qs = solr.Queryset(
@@ -109,13 +127,26 @@ def refresh_status(self):
109127
pass
110128
self.cache_lookup('status', lookup)
111129

130+
def refresh_row_numbers(self):
131+
if self.item_ids:
132+
row_numbers = self._lookup_cache.get('row_numbers', {})
133+
location_code = self.context['view'].kwargs['code']
134+
r = RedisObject('shelflistitem_manifest', location_code)
135+
fetched = r.get_index(*self.item_ids)
136+
if fetched is not None:
137+
row_numbers.update(dict(zip(self.item_ids, fetched)))
138+
self.cache_lookup('row_numbers', row_numbers)
139+
112140
def prepare_for_serialization(self, obj_data):
113141
obj_data['__context'] = self.context
114142
st_code = obj_data.get('status_code')
115143
if st_code == '-' and obj_data.get('due_date') is not None:
116144
obj_data['status'] = 'CHECKED OUT'
117145
else:
118146
obj_data['status'] = self.get_lookup_value('status', st_code)
147+
obj_data['row_number'] = self.get_lookup_value(
148+
'row_numbers', obj_data['id'], refresh=False
149+
)
119150
return obj_data
120151

121152
def save(self, *args, **kwargs):

django/sierra/shelflist/tests/test_api.py

Lines changed: 173 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
import ujson
1313
from shelflist.exporters import ItemsToSolr
1414
from shelflist.search_indexes import ShelflistItemIndex
15+
from shelflist.serializers import ShelflistItemSerializer
1516
from six import text_type
1617
from six.moves import range
18+
from utils.redisobjs import RedisObject
1719

1820

1921
# FIXTURES AND TEST DATA
@@ -643,7 +645,7 @@ def test_shelflistitem_view_orderby(order_by, api_settings, shelflist_solr_env,
643645

644646
def test_shelflistitem_row_order(api_settings, shelflist_solr_env,
645647
get_shelflist_urls, api_client, redis_obj,
646-
get_found_ids, settings):
648+
get_found_ids):
647649
"""
648650
The `shelflistitems` list view should list items in the same order
649651
that the shelflist manifest for that location lists them. The
@@ -652,12 +654,11 @@ def test_shelflistitem_row_order(api_settings, shelflist_solr_env,
652654
recs = shelflist_solr_env.records['shelflistitem']
653655
loc = recs[0]['location_code']
654656
loc_recs = [r for r in recs if r['location_code'] == loc]
655-
using = settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
657+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
656658
index = ShelflistItemIndex(using=using)
657659
manifest = index.get_location_manifest(loc)
658660
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, loc)
659661
redis_obj(redis_key).set(manifest)
660-
661662
url = get_shelflist_urls(shelflist_solr_env.records['shelflistitem'])[loc]
662663
response = api_client.get(url)
663664
total = response.data['totalCount']
@@ -667,6 +668,169 @@ def test_shelflistitem_row_order(api_settings, shelflist_solr_env,
667668
assert row_numbers == [num for num in range(0, total)]
668669

669670

671+
def test_shelflistitem_row_pagination(api_settings, shelflist_solr_env,
672+
get_shelflist_urls, api_client,
673+
redis_obj, get_found_ids):
674+
"""
675+
When paginating a `shelflistitems` list view, the `rowNumber`
676+
values should accurately reflect the requested pagination
677+
parameters.
678+
"""
679+
recs = shelflist_solr_env.records['shelflistitem']
680+
loc = recs[0]['location_code']
681+
loc_recs = [r for r in recs if r['location_code'] == loc]
682+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
683+
index = ShelflistItemIndex(using=using)
684+
manifest = index.get_location_manifest(loc)
685+
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, loc)
686+
redis_obj(redis_key).set(manifest)
687+
total = len(loc_recs)
688+
offset = int(total / 2)
689+
limit = int(total / 4)
690+
limit_p = api_settings.REST_FRAMEWORK['PAGINATE_BY_PARAM']
691+
offset_p = api_settings.REST_FRAMEWORK['PAGINATE_PARAM']
692+
url = get_shelflist_urls(shelflist_solr_env.records['shelflistitem'])[loc]
693+
paginated_url = f"{url}?{limit_p}={limit}&{offset_p}={offset}"
694+
response = api_client.get(paginated_url)
695+
found_ids = get_found_ids('id', response)
696+
row_numbers = get_found_ids('rowNumber', response)
697+
assert found_ids[0] == manifest[offset]
698+
assert row_numbers[0] == offset
699+
assert found_ids[-1] == manifest[offset + limit - 1]
700+
assert row_numbers[-1] == offset + limit - 1
701+
702+
703+
def test_shelflistitem_row_filtering(api_settings, shelflist_solr_env,
704+
get_shelflist_urls, api_client,
705+
assemble_shelflist_test_records,
706+
redis_obj, get_found_ids):
707+
"""
708+
When filtering a `shelflistitems` list view, the `rowNumber`
709+
values should accurately reflect the items included in the filter.
710+
"""
711+
recs = shelflist_solr_env.records['shelflistitem']
712+
loc = recs[0]['location_code']
713+
loc_recs = [r for r in recs if r['location_code'] == loc]
714+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
715+
716+
exp_rows = [2, 4, 10, 13, 14, 17]
717+
test_data = [
718+
{
719+
'id': f"TEST{i}",
720+
'call_number_type': '__A',
721+
'call_number': f"___A{i}",
722+
'call_number_sort': f"___A{i:04}",
723+
'call_number_search': f"___A{i}",
724+
'shelf_status': 'SELECT_ME' if i in exp_rows else 'SKIP',
725+
'location_code': loc
726+
} for i in range(20)
727+
]
728+
exp_ids = [f"TEST{i}" for i in exp_rows]
729+
_, trecs = assemble_shelflist_test_records(
730+
[(item['id'], item) for item in test_data]
731+
)
732+
733+
index = ShelflistItemIndex(using=using)
734+
manifest = index.get_location_manifest(loc)
735+
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, loc)
736+
redis_obj(redis_key).set(manifest)
737+
738+
url = get_shelflist_urls(shelflist_solr_env.records['shelflistitem'])[loc]
739+
filtered_url = f"{url}?shelfStatus=SELECT_ME"
740+
response = api_client.get(filtered_url)
741+
found_ids = get_found_ids('id', response)
742+
row_numbers = get_found_ids('rowNumber', response)
743+
assert found_ids == exp_ids
744+
assert row_numbers == exp_rows
745+
746+
747+
def test_shelflistitem_detail_view_rows(api_settings, shelflist_solr_env,
748+
get_shelflist_urls, api_client,
749+
redis_obj):
750+
"""
751+
The `shelflistitems` detail view for individual items should
752+
reflect the correct `rowNumber` values.
753+
"""
754+
recs = shelflist_solr_env.records['shelflistitem']
755+
loc = recs[0]['location_code']
756+
loc_recs = [r for r in recs if r['location_code'] == loc]
757+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
758+
index = ShelflistItemIndex(using=using)
759+
manifest = index.get_location_manifest(loc)
760+
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, loc)
761+
redis_obj(redis_key).set(manifest)
762+
total = len(loc_recs)
763+
url = get_shelflist_urls(shelflist_solr_env.records['shelflistitem'])[loc]
764+
rows_to_check = (0, 2, len(manifest) - 1)
765+
for exp_row in rows_to_check:
766+
item_id = manifest[exp_row]
767+
response = api_client.get(f"{url}{item_id}")
768+
assert response.data['rowNumber'] == exp_row
769+
770+
771+
def test_shelflistitem_list_row_caching(api_settings, shelflist_solr_env,
772+
get_shelflist_urls, api_client,
773+
mocker):
774+
"""
775+
For `shelflistitem` views, the `rowNumber` value comes from Redis.
776+
The serializer is configured to minimize Redis lookups (to speed up
777+
requests). On a list view, it gets the first item from Redis but
778+
calculates the rest of the values, and it caches them. The cache is
779+
refreshed on every list view request. Detail view requests try to
780+
use the cache -- but they look the value up in Redis if there's a
781+
cache miss.
782+
"""
783+
mocker.patch.object(
784+
RedisObject, 'get_index', mocker.Mock(
785+
side_effect=lambda *args: list(range(1000, 1000 + len(args)))
786+
)
787+
)
788+
ShelflistItemSerializer._lookup_cache['row_numbers'] = {}
789+
recs = shelflist_solr_env.records['shelflistitem']
790+
loc = recs[0]['location_code']
791+
loc_recs = [r for r in recs if r['location_code'] == loc]
792+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
793+
index = ShelflistItemIndex(using=using)
794+
manifest = index.get_location_manifest(loc)
795+
url = get_shelflist_urls(shelflist_solr_env.records['shelflistitem'])[loc]
796+
response = api_client.get(url)
797+
call_stack = []
798+
799+
# The serializer lookup_cache should contain row numbers for all
800+
# items we've requested. Note it starts at 1000 because this is
801+
# what the mock we created above returns.
802+
assert ShelflistItemSerializer._lookup_cache['row_numbers'] == {
803+
item_id: 1000 + i for i, item_id in enumerate(manifest)
804+
}
805+
806+
# Assuming we have multiple items in our list view, we only query
807+
# Redis to get the first item, and we extrapolate to get the rest
808+
# of the items in the list.
809+
assert len(manifest) > 1
810+
call_stack.append(mocker.call(*manifest))
811+
assert RedisObject.get_index.mock_calls == call_stack
812+
813+
# Now when we request detail views for individual rows in the list,
814+
# it should still use the cached row number, if it finds it,
815+
# instead of querying Redis.
816+
for ping_row in (0, 2, len(manifest) - 1):
817+
api_client.get(f"{url}{manifest[ping_row]}")
818+
assert RedisObject.get_index.mock_calls == call_stack
819+
820+
# When we hit the list view again it should refresh the cache,
821+
# resulting in an additional call to Redis.
822+
api_client.get(url)
823+
call_stack.append(mocker.call(*manifest))
824+
assert RedisObject.get_index.mock_calls == call_stack
825+
826+
# If we clear the serializer lookup_cache, then hitting the detail
827+
# view for an individual row requests the rowNumber from Redis.
828+
ShelflistItemSerializer._lookup_cache['row_numbers'] = {}
829+
api_client.get(f"{url}{manifest[-1]}")
830+
call_stack.append(mocker.call(manifest[-1]))
831+
assert RedisObject.get_index.mock_calls == call_stack
832+
833+
670834
def test_shelflistitem_putpatch_requires_auth(api_settings,
671835
assemble_custom_shelflist,
672836
get_shelflist_urls, api_client):
@@ -731,15 +895,15 @@ def test_shelflistitem_update_items(method, api_settings,
731895
shelflist_solr_env,
732896
filter_serializer_fields_by_opt,
733897
derive_updated_resource, send_api_data,
734-
get_shelflist_urls, api_client, settings):
898+
get_shelflist_urls, api_client):
735899
"""
736900
Updating writeable fields on shelflistitems should update/save the
737901
resource: it should update the writeable fields that were changed
738902
and keep all other fields exactly the same.
739903
"""
740904
test_lcode, test_id = '1test', 'i99999999'
741905
_, _, trecs = assemble_custom_shelflist(test_lcode, [(test_id, {})])
742-
using = settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
906+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
743907
index = ShelflistItemIndex(using=using)
744908
manifest = index.get_location_manifest(test_lcode)
745909
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, test_lcode)
@@ -785,14 +949,14 @@ def test_shelflistitem_delete_data(method, api_settings,
785949
shelflist_solr_env,
786950
filter_serializer_fields_by_opt,
787951
derive_updated_resource, send_api_data,
788-
get_shelflist_urls, api_client, settings):
952+
get_shelflist_urls, api_client):
789953
"""
790954
Updating a writeable field and providing a null value (None) should
791955
delete the stored value.
792956
"""
793957
test_lcode, test_id = '1test', 'i99999999'
794958
_, _, trecs = assemble_custom_shelflist(test_lcode, [(test_id, {})])
795-
using = settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
959+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
796960
index = ShelflistItemIndex(using=using)
797961
manifest = index.get_location_manifest(test_lcode)
798962
redis_key = '{}:{}'.format(REDIS_SHELFLIST_PREFIX, test_lcode)
@@ -888,7 +1052,7 @@ def test_shelflist_firstitemperlocation_list(test_data, search, expected,
8881052
api_settings, redis_obj,
8891053
assemble_custom_shelflist,
8901054
api_client, get_found_ids,
891-
do_filter_search, settings):
1055+
do_filter_search):
8921056
"""
8931057
The `firstitemperlocation` resource is basically a custom filter
8941058
for `items` that submits a facet-query to Solr asking for the first
@@ -907,7 +1071,7 @@ def test_shelflist_firstitemperlocation_list(test_data, search, expected,
9071071
recs = test_data_by_location.get(lcode, []) + [(test_id, rec)]
9081072
test_data_by_location[lcode] = recs
9091073

910-
using = settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
1074+
using = api_settings.REST_VIEWS_HAYSTACK_CONNECTIONS['ShelflistItems']
9111075
index = ShelflistItemIndex(using=using)
9121076
for test_lcode, data in test_data_by_location.items():
9131077
assemble_custom_shelflist(test_lcode, data)

0 commit comments

Comments
 (0)