Skip to content

Commit 28abedd

Browse files
committed
Merge branch 'improve-tasks'
This set of changes optimizes exporters, particularly `export.tasks`, to greatly improve stability, performance, and reliability of export jobs. Testing in our staging environment shows a 4-5X throughput with the new tasks. Note that these are a bit more memory-intensive than the previous tasks, so you may need to use the new EXPORTER_MAX_RC_CONFIG and EXPORTER_MAX_DC_CONFIG settings to scale how large job chunks are.
2 parents 70664bb + 1c8a45c commit 28abedd

12 files changed

Lines changed: 749 additions & 418 deletions

File tree

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,19 @@ circumstances. If the variable is not set, the default value is used.
899899
will match up with a `config.properties` file in
900900
`<project_root>/solr/solrmarc`. (See "SolrMarc Configuration," below,
901901
for more information.) Default is `dev_config.properties`.
902+
* `EXPORTER_MAX_RC_CONFIG` and `EXPORTER_MAX_DC_CONFIG` &mdash; These two
903+
settings allow you to set overrides for the `max_rec_chunk` and
904+
`max_del_chunk` attributes of `Exporter` objects. They are totally
905+
optional; by default whatever value is set on the class is what will be
906+
used, if a specific override is not set. However, depending on how your
907+
development, production, staging, and testing environments are set up,
908+
you may need (e.g.) your development settings scaled back compared to
909+
your staging and production settings. This lets you configure that on
910+
an env-specific basis. Do note that the convention used for the settings
911+
as in your .env file looks like this:
912+
913+
EXPORTER_MAX_RC_CONFIG="ItemsToSolr:1000,BibsToSolr:500"
914+
902915
* Production Settings &mdash; These are settings you'll probably only need to
903916
set in production. If your development environment is very different than
904917
the default setup, then you may need to set these there as well.

django/sierra/base/managers.py

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
'''
1+
"""
22
Custom Managers for sierra base app models.
3-
'''
3+
"""
44
from datetime import date, time, datetime
55

66
from django.db import models
@@ -10,7 +10,7 @@
1010

1111

1212
class CustomFilterManager(models.Manager):
13-
'''
13+
"""
1414
A generic models.Manager class that provides the ability to set
1515
custom filters easily. Just create a child class, and then create a
1616
method with the same name as the filter that you pass to filter_by.
@@ -20,13 +20,13 @@ class CustomFilterManager(models.Manager):
2020
set. For instance: [{a AND b AND c} OR {d} OR {e AND f}]. Order_by
2121
should be an array that can be passed as arguments to
2222
queryset.order_by().
23-
'''
23+
"""
2424
options = {}
2525

2626
def _apply_filter(self, filter_method):
27-
'''
27+
"""
2828
Applies the filter_method and returns the filtered queryset.
29-
'''
29+
"""
3030
filter_params = filter_method()
3131
filter = filter_params['filter']
3232
order_by = filter_params['order_by']
@@ -39,29 +39,31 @@ def _apply_filter(self, filter_method):
3939
set = set.order_by(*order_by)
4040
return set
4141

42-
def filter_by(self, filter_method, options={}):
43-
'''
42+
def filter_by(self, filter_method, options=None):
43+
"""
4444
Fetches a set of records based on a filter string and any
4545
options you specify. Options should be a dictionary.
46-
'''
47-
self.options = options
46+
"""
47+
self.options = options or {}
4848
filter_method = getattr(self, filter_method)
4949
return self._apply_filter(filter_method)
5050

5151

5252
class RecordManager(CustomFilterManager):
53-
'''
53+
"""
5454
Defines some common filters that apply across multiple types of
5555
records from the Sierra database, such as the base record types
5656
(item, bib, patron, etc.)
57-
'''
57+
"""
5858

5959
def updated_date_range(self):
60-
'''
60+
"""
6161
Filter by a date range for last_updated. Options should contain
62-
date_range_from and date_range_to, each of which are simply date
63-
objects.
64-
'''
62+
date_range_from and date_range_to, each of which are simply
63+
date objects. Options *may* contain `is_deletion`, which is a
64+
boolean that indicates whether or not this requires "last
65+
deleted" rather than "last updated".
66+
"""
6567
options = self.options
6668
date_from = datetime.combine(options['date_range_from'], time(0, 0))
6769
date_from = tz.make_aware(date_from, tz.get_default_timezone())
@@ -74,25 +76,25 @@ def updated_date_range(self):
7476
prefix = 'record_metadata__'
7577
else:
7678
prefix = ''
77-
filter = [
78-
{
79-
'{}record_last_updated_gmt__gte'.format(prefix): date_from,
80-
'{}record_last_updated_gmt__lte'.format(prefix): date_to
81-
},
82-
{
79+
if options.get('is_deletion', False):
80+
filter = [{
8381
'{}deletion_date_gmt__gte'.format(prefix): date_from,
8482
'{}deletion_date_gmt__lte'.format(prefix): date_to,
85-
}
86-
]
87-
88-
order_by = ['{}record_last_updated_gmt'.format(prefix)]
83+
}]
84+
order_by = ['{}deletion_date_gmt'.format(prefix)]
85+
else:
86+
filter = [{
87+
'{}record_last_updated_gmt__gte'.format(prefix): date_from,
88+
'{}record_last_updated_gmt__lte'.format(prefix): date_to
89+
}]
90+
order_by = ['{}record_last_updated_gmt'.format(prefix)]
8991
return {'filter': filter, 'order_by': order_by}
9092

9193
def record_range(self):
92-
'''
94+
"""
9395
Filter by a III record number range. Options should contain
9496
record_range_from and record_range_to.
95-
'''
97+
"""
9698
options = self.options
9799
record_from = options['record_range_from']
98100
record_to = options['record_range_to']
@@ -109,30 +111,31 @@ def record_range(self):
109111
return {'filter': filter, 'order_by': order_by}
110112

111113
def last_export(self):
112-
'''
114+
"""
113115
Filter by a latest updated datetime in options['latest_time'].
114-
'''
116+
"""
115117
options = self.options
116118
latest_time = options['latest_time']
117119
if self.model._meta.object_name != 'RecordMetadata':
118120
prefix = 'record_metadata__'
119121
else:
120122
prefix = ''
121-
filter = [
122-
{
123-
'{}record_last_updated_gmt__gte'.format(prefix): latest_time
124-
},
125-
{
123+
if options.get('is_deletion', False):
124+
filter = [{
126125
'{}deletion_date_gmt__gte'.format(prefix): latest_time
127-
}
128-
]
129-
order_by = ['{}record_last_updated_gmt'.format(prefix)]
126+
}]
127+
order_by = ['{}record_last_updated_gmt'.format(prefix)]
128+
else:
129+
filter = [{
130+
'{}record_last_updated_gmt__gte'.format(prefix): latest_time
131+
}]
132+
order_by = ['{}deletion_date_gmt'.format(prefix)]
130133
return {'filter': filter, 'order_by': order_by}
131134

132135
def full_export(self):
133-
'''
136+
"""
134137
No filter.
135-
'''
138+
"""
136139
if self.model._meta.object_name != 'RecordMetadata':
137140
prefix = 'record_metadata__'
138141
else:
@@ -142,9 +145,9 @@ def full_export(self):
142145
return {'filter': filter, 'order_by': order_by}
143146

144147
def location(self):
145-
'''
148+
"""
146149
Filters item records by location (code).
147-
'''
150+
"""
148151
options = self.options
149152
location_code = self.options['location_code']
150153
if self.model._meta.object_name == 'RecordMetadata':

django/sierra/export/admin.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66

77
from .models import ExportType, ExportFilter, ExportInstance, Status
88
from .forms.modelforms import ExportForm
9-
from .tasks import trigger_export
9+
from .tasks import export_dispatch
1010

1111
def process_export_form(request):
12-
'''
12+
"""
1313
Takes a request object and validates/parses/processes POSTed form
1414
data from an ExportForm. Returns the validated form object.
15-
'''
15+
"""
1616
form = ExportForm(request.POST)
1717
post = request.POST
1818
if form.is_valid():
@@ -21,7 +21,7 @@ def process_export_form(request):
2121
export_instance.user = request.user
2222
export_instance.filter_params = params
2323
export_instance.timestamp = tz.now()
24-
export_instance.status = Status.objects.get(pk='in_progress')
24+
export_instance.status = Status.objects.get(pk='waiting')
2525
export_instance.save()
2626
return form
2727

@@ -68,8 +68,8 @@ def add_view(self, request, form_url='', extra_content=None):
6868
export_type = data['export_type'].pk
6969
del data['export_filter']
7070
del data['export_type']
71-
trigger_export(form.instance, export_filter,
72-
export_type, data)
71+
export_dispatch(form.instance.pk, export_filter, export_type,
72+
data)
7373
reverse_url = 'admin:{}_{}_change'.format(
7474
self.model._meta.app_label,
7575
self.model._meta.module_name
@@ -91,4 +91,4 @@ class StatusAdmin(admin.ModelAdmin):
9191
admin.site.register(ExportType, ExportTypeAdmin)
9292
admin.site.register(ExportFilter, ExportFilterAdmin)
9393
admin.site.register(ExportInstance, ExportInstanceAdmin)
94-
admin.site.register(Status, StatusAdmin)
94+
admin.site.register(Status, StatusAdmin)

0 commit comments

Comments
 (0)