14142. Overrides the SolrSearchBackend.clear() method so that the Solr
1515index optimization isn't triggered if commit is false.
1616"""
17+ import subprocess
18+ import os
19+ import shlex
20+ import re
1721
1822from django .apps import apps
23+ from django .conf import settings
1924
2025from haystack .backends import solr_backend , BaseEngine
2126from haystack .models import SearchResult
@@ -155,4 +160,110 @@ def clear(self, models=[], commit=True):
155160
156161class CustomSolrEngine (BaseEngine ):
157162 backend = CustomSolrSearchBackend
158- query = solr_backend .SolrSearchQuery
163+ query = solr_backend .SolrSearchQuery
164+
165+
166+ class SolrmarcIndexBackend (CustomSolrSearchBackend ):
167+ """
168+ This is a custom Solr backend class for Haystack(ish) indexes that
169+ implements doing index updates via Solrmarc. All of the code here
170+ is derived from the code that was part of the `BibsDownloadMarc`
171+ `BibsToSolr` exporters (in `export.basic_exporters`). As we're
172+ working on additional indexes fed by Solrmarc (for Blacklight),
173+ it started to make more sense to move that into a lower-level
174+ class for more uniformity at the index and exporter levels.
175+
176+ How to use this class? In Django settings, use the SolrmarcEngine
177+ class in your HAYSTACK_CONNECTIONS definition. Ensure that you've
178+ created the applicable Solr core and that you have an
179+ index.properties file in the solr/solrmarc project directory for
180+ that index. (By default you should name it <core>_index.properties,
181+ where <core> is the name of the Solr core.) Your haystack index
182+ class should be a `base.search_indexes.CustomQuerySetIndex` or
183+ `SolrmarcIndex` class. There are a few class attributes you can add
184+ to the index class to help further define how the SolrMarc process
185+ works--without them, sensible defaults are used.
186+
187+ `s2marc_class` -- The S2MarcBatch (see `export.sierra2marc`) or
188+ equivalent/derived class that does the batch conversion of Sierra
189+ data (via the Django ORM models) to MARC records and saves them to
190+ the filesystem so that Solrmarc can index them. Default is
191+ S2MarcBatch.
192+
193+ `index_properties` -- The filename for the index.properties file
194+ that converts the MARC files to Solr fields. As mentioned above,
195+ the default is '<core>_index.propertes' -- where <core> is the name
196+ of the Solr core for that index.
197+
198+ `config_file` -- The filename for the Solrmarc config.properties
199+ file that defines a bunch of settings used by Solrmarc. Default is
200+ the SOLRMARC_CONFIG_FILE Django setting.
201+
202+ `temp_filepath` -- The filesystem location where the temporary MARC
203+ file that gets loaded into Solrmarc is stored. Default is the
204+ MEDIA_ROOT Django setting.
205+ """
206+
207+ class IndexError (Exception ):
208+ pass
209+
210+ def log_error (self , index , obj_str , err ):
211+ err = err if isinstance (err , Exception ) else self .IndexError (err )
212+ index .last_batch_errors .append ((obj_str , err ))
213+
214+ def _records_to_marcfile (self , index , records ):
215+ batch = index .s2marc_class (records )
216+ out_recs = batch .to_marc ()
217+ try :
218+ filename = batch .to_file (out_recs , append = False )
219+ except IOError as e :
220+ raise IOError ('Error writing to output file: {}' .format (e ))
221+ for e in batch .errors :
222+ self .log_error (index , e .id , e .msg )
223+ return filename
224+
225+ def _formulate_solrmarc_cmd (self , index , rec_filepath , commit ):
226+ def_ip = '{}_index.properties' .format (self .get_core_name ())
227+ index_properties = getattr (index , 'index_properties' , None ) or def_ip
228+ def_config = settings .SOLRMARC_CONFIG_FILE
229+ config_file = getattr (index , 'config_file' , None ) or def_config
230+ commit_str = 'true' if commit else 'false'
231+ jarfile = ('{}/../../solr/solrmarc/StanfordSearchWorksSolrMarc.jar'
232+ '' .format (settings .PROJECT_DIR ))
233+ return ('java -Xmx1g -Dsolr.hosturl="{}" '
234+ '-Dsolrmarc.indexing.properties="{}" '
235+ '-Dsolr.commit_at_end="{}" '
236+ '-jar "{}" {} {}'
237+ '' .format (self .conn .url , index_properties , commit_str , jarfile ,
238+ config_file , rec_filepath ))
239+
240+ def get_core_name (self ):
241+ return self .conn .url .split ('/' )[- 1 ]
242+
243+ def update (self , index , records , commit = False ):
244+ filedir = getattr (index , 'temp_filedir' , None ) or settings .MEDIA_ROOT
245+ if not filedir .endswith ('/' ):
246+ filedir = '{}/' .format (filedir )
247+ rec_filename = self ._records_to_marcfile (index , records )
248+ rec_filepath = '{}{}' .format (filedir , rec_filename )
249+ cmd = self ._formulate_solrmarc_cmd (index , rec_filepath , commit )
250+ call_options = {'stderr' : subprocess .STDOUT , 'shell' : False ,
251+ 'universal_newlines' : True }
252+ try :
253+ result = subprocess .check_output (shlex .split (cmd ), ** call_options )
254+ output = result .decode ('unicode-escape' )
255+ except subprocess .CalledProcessError as e :
256+ msg = ('Solrmarc process did not run successfully: {}'
257+ '' .format (e .output ))
258+ self .log_error (index , 'ERROR' , msg )
259+ else :
260+ for line in output .split ("\n " )[:- 1 ]:
261+ line = re .sub (r'^\s+' , '' , line )
262+ if re .match (r'^(WARN|ERROR)' , line ):
263+ self .log_error (index , 'WARNING' , line )
264+ os .remove (rec_filepath )
265+
266+
267+ class SolrmarcEngine (BaseEngine ):
268+ backend = SolrmarcIndexBackend
269+ query = solr_backend .SolrSearchQuery
0 commit comments