Skip to content

Commit 83b37c8

Browse files
authored
Merge pull request #3179 from pulibrary/contains-title-index
Rewrite contains_title_index in Rust
2 parents 0b5e997 + 8d27ef2 commit 83b37c8

5 files changed

Lines changed: 51 additions & 4 deletions

File tree

lib/bibdata_rs/src/marc/ruby_bindings.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ fn solr_fields(ruby: &Ruby, record_string: String) -> Result<RHash, magnus::Erro
102102
.ok()
103103
.and_then(|date| date.maybe_to_string());
104104

105-
let hash = ruby.hash_new_capa(35);
105+
let hash = ruby.hash_new_capa(36);
106106
hash.aset("aat_s", ruby.ary_from_iter(genre::aat_s(&record)))?;
107107
hash.aset("action_notes_1display", action_notes_1display)?;
108108
hash.aset("access_restrictions_note_display", access_notes(&record))?;
@@ -128,6 +128,7 @@ fn solr_fields(ruby: &Ruby, record_string: String) -> Result<RHash, magnus::Erro
128128
ruby.ary_from_iter(cjk::subjects_cjk(&record)),
129129
)?;
130130
hash.aset("cjk_title", ruby.ary_from_iter(cjk::cjk_titles(&record)))?;
131+
hash.aset("contains_title_index", ruby.ary_from_iter(title::contains_titles_index(&record)))?;
131132
hash.aset(
132133
"fast_subject_display",
133134
ruby.ary_from_iter(subject::fast_subjects(&record)),

lib/bibdata_rs/src/marc/string_normalize.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ pub fn upcase_first(string: &str) -> Cow<'_, str> {
5656
}
5757
}
5858

59+
pub fn maybe_not_empty<S>(s: S) -> Option<S>
60+
where
61+
S: AsRef<str>,
62+
{
63+
if s.as_ref().is_empty() { None } else { Some(s) }
64+
}
65+
5966
#[cfg(test)]
6067
mod tests {
6168
use super::*;

lib/bibdata_rs/src/marc/title.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,27 @@
1-
use crate::marc::extract_values::ExtractValues;
1+
use crate::marc::{
2+
extract_values::ExtractValues,
3+
string_normalize::maybe_not_empty,
4+
trim_punctuation,
5+
variable_length_field::{SubfieldIterator, latin_or_non_latin_tag_included_in},
6+
};
27
use itertools::Itertools;
38
use marctk::Record;
49

10+
pub fn contains_titles_index(record: &Record) -> impl Iterator<Item = String> {
11+
record.extract_field_values_by(
12+
latin_or_non_latin_tag_included_in(&["700", "710", "711"]),
13+
|field| {
14+
maybe_not_empty(trim_punctuation(
15+
&field
16+
.subfields()
17+
.iter()
18+
.subfields_after("t")
19+
.join(" "),
20+
))
21+
},
22+
)
23+
}
24+
525
pub fn latin_script_title(record: &Record) -> Option<String> {
626
record
727
.extract_field_values_by(
@@ -19,3 +39,18 @@ pub fn latin_script_title(record: &Record) -> Option<String> {
1939
)
2040
.next()
2141
}
42+
43+
#[cfg(test)]
44+
mod tests {
45+
use super::*;
46+
47+
#[test]
48+
fn it_can_find_contains_titles_index() {
49+
let record = Record::from_breaker(r#"=700 12$6880-12$aDawwānī, Muḥammad ibn Asʻad, $d 1426 or 1427-1512 or 1513. $t Zawrāʼ.
50+
=880 12$6700-12$aدواني، محمد بن اسعد, $d 1426 or 1427-1512 or 1513. $t زوراء."#).unwrap();
51+
let mut contains_titles = contains_titles_index(&record);
52+
assert_eq!(contains_titles.next(), Some(String::from("Zawrāʼ")));
53+
assert_eq!(contains_titles.next(), Some(String::from("زوراء")));
54+
assert_eq!(contains_titles.next(), None);
55+
}
56+
}

lib/bibdata_rs/src/marc/variable_length_field.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use marctk::{Field, Subfield};
33

44
pub trait SubfieldIterator<'a>: Iterator<Item = &'a Subfield> {
55
fn content(self) -> impl Iterator<Item = &'a str>;
6+
fn join(self, delimiter: &'a str) -> String;
67
fn filter_by_code(self, codes: &'a [&str]) -> impl Iterator<Item = &'a Subfield>;
78
fn subfields_before(self, stop_before: &'a str) -> impl Iterator<Item = &'a Subfield>;
89
fn subfields_after(self, start_at: &'a str) -> impl Iterator<Item = &'a Subfield>;
@@ -14,6 +15,9 @@ where
1415
fn content(self) -> impl Iterator<Item = &'a str> {
1516
self.map(|subfield| subfield.content())
1617
}
18+
fn join(self, delimiter: &'a str) -> String {
19+
self.content().join(delimiter)
20+
}
1721
fn filter_by_code(self, codes: &'a [&str]) -> impl Iterator<Item = &'a Subfield> {
1822
self.filter(move |subfield| codes.contains(&subfield.code()))
1923
}

marc_to_solr/lib/traject_config.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@
221221

222222
to_field 'content_title_index', extract_marc('505t')
223223

224-
to_field 'contains_title_index' do |record, accumulator|
225-
accumulator.replace(everything_after_t(record, '700:710:711'))
224+
to_field 'contains_title_index' do |_record, accumulator, context|
225+
accumulator.replace(context.clipboard[:solr_fields]['contains_title_index'])
226226
end
227227

228228
to_field 'linked_title_index', extract_marc('765st:767st:770st:772st:773st:774st:775st:776st:777st:780st:785st:786st:787st')

0 commit comments

Comments
 (0)