Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 32 additions & 37 deletions quickwit/quickwit-indexing/src/actors/packager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ use quickwit_common::temp_dir::TempDirectory;
use quickwit_directories::write_hotcache;
use quickwit_doc_mapper::NamedField;
use quickwit_doc_mapper::tag_pruning::append_to_tag_set;
use quickwit_proto::search::{
ListFieldType, ListFields, ListFieldsEntryResponse, serialize_split_fields,
};
use quickwit_proto::search::{ListFieldsEntry, ListFieldsMetadata, ListFieldsType};
use tantivy::index::FieldMetadata;
use tantivy::schema::{FieldType, Type};
use tantivy::{InvertedIndexReader, ReloadPolicy, SegmentMeta};
Expand Down Expand Up @@ -314,7 +312,7 @@ fn create_packaged_split(
build_hotcache(split.split_scratch_directory.path(), &mut hotcache_bytes)?;
ctx.record_progress();

let serialized_split_fields = serialize_field_metadata(&fields_metadata);
let serialized_split_fields = serialize_fields_metadata(&fields_metadata);

let packaged_split = PackagedSplit {
serialized_split_fields,
Expand All @@ -327,37 +325,19 @@ fn create_packaged_split(
Ok(packaged_split)
}

/// Serializes the Split fields.
///
/// `fields_metadata` has to be sorted.
fn serialize_field_metadata(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
let fields = fields_metadata
/// Serializes the fields metadata from a split sorted by (name, type).
fn serialize_fields_metadata(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
let entries = fields_metadata
.iter()
.map(field_metadata_to_list_field_serialized)
.map(field_metadata_to_list_fields_entry)
.sorted_unstable_by(|left, right| left.cmp_by_name_and_type(right))
.collect::<Vec<_>>();

serialize_split_fields(ListFields { fields })
}

fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldType {
match typ {
Type::Str => ListFieldType::Str,
Type::U64 => ListFieldType::U64,
Type::I64 => ListFieldType::I64,
Type::F64 => ListFieldType::F64,
Type::Bool => ListFieldType::Bool,
Type::Date => ListFieldType::Date,
Type::Facet => ListFieldType::Facet,
Type::Bytes => ListFieldType::Bytes,
Type::Json => ListFieldType::Json,
Type::IpAddr => ListFieldType::IpAddr,
}
ListFieldsMetadata { entries }.serialize()
}

fn field_metadata_to_list_field_serialized(
field_metadata: &FieldMetadata,
) -> ListFieldsEntryResponse {
ListFieldsEntryResponse {
fn field_metadata_to_list_fields_entry(field_metadata: &FieldMetadata) -> ListFieldsEntry {
ListFieldsEntry {
field_name: field_metadata.field_name.to_string(),
field_type: tantivy_type_to_list_field_type(field_metadata.typ) as i32,
searchable: field_metadata.is_indexed(),
Expand All @@ -368,6 +348,21 @@ fn field_metadata_to_list_field_serialized(
}
}

fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldsType {
match typ {
Type::Bool => ListFieldsType::Bool,
Type::Bytes => ListFieldsType::Bytes,
Type::Date => ListFieldsType::Date,
Type::F64 => ListFieldsType::F64,
Type::Facet => ListFieldsType::Facet,
Type::I64 => ListFieldsType::I64,
Type::IpAddr => ListFieldsType::IpAddr,
Type::Json => ListFieldsType::Json,
Type::Str => ListFieldsType::Str,
Type::U64 => ListFieldsType::U64,
}
}

/// Reads u64 from stored term data.
fn u64_from_term_data(data: &[u8]) -> anyhow::Result<u64> {
let u64_bytes: [u8; 8] = data[0..8]
Expand All @@ -382,7 +377,7 @@ mod tests {

use quickwit_actors::{ObservationType, Universe};
use quickwit_metastore::checkpoint::IndexCheckpointDelta;
use quickwit_proto::search::{ListFieldsEntryResponse, deserialize_split_fields};
use quickwit_proto::search::{ListFieldsEntry, ListFieldsMetadata};
use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId};
use tantivy::directory::MmapDirectory;
use tantivy::schema::{FAST, NumericOptions, STRING, Schema, TEXT, Type};
Expand Down Expand Up @@ -424,24 +419,24 @@ mod tests {
},
];

let out = serialize_field_metadata(&fields_metadata);
let out = serialize_fields_metadata(&fields_metadata);

let deserialized: Vec<ListFieldsEntryResponse> =
deserialize_split_fields(&mut &out[..]).unwrap().fields;
let deserialized: Vec<ListFieldsEntry> =
ListFieldsMetadata::deserialize(&out[..]).unwrap().entries;

assert_eq!(fields_metadata.len(), deserialized.len());
assert_eq!(deserialized[0].field_name, "test");
assert_eq!(deserialized[0].field_type, ListFieldType::Str as i32);
assert_eq!(deserialized[0].field_type, ListFieldsType::Str as i32);
assert!(deserialized[0].searchable);
assert!(deserialized[0].aggregatable);

assert_eq!(deserialized[1].field_name, "test2");
assert_eq!(deserialized[1].field_type, ListFieldType::Str as i32);
assert_eq!(deserialized[1].field_type, ListFieldsType::Str as i32);
assert!(deserialized[1].searchable);
assert!(!deserialized[1].aggregatable);

assert_eq!(deserialized[2].field_name, "test3");
assert_eq!(deserialized[2].field_type, ListFieldType::U64 as i32);
assert_eq!(deserialized[2].field_type, ListFieldsType::U64 as i32);
assert!(deserialized[2].searchable);
assert!(deserialized[2].aggregatable);
}
Expand Down
23 changes: 14 additions & 9 deletions quickwit/quickwit-proto/protos/quickwit/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ message ListFieldsRequest {
repeated string index_id_patterns = 1;
// Optional limit query to a list of fields
// Wildcard expressions are supported.
repeated string fields = 2;
repeated string field_patterns = 2;

// Time filter, expressed in seconds since epoch.
// That filter is to be interpreted as the semi-open interval:
Expand All @@ -144,16 +144,23 @@ message LeafListFieldsRequest {

// Optional limit query to a list of fields
// Wildcard expressions are supported.
repeated string fields = 4;
repeated string field_patterns = 4;
}

/// Message returned by leaf and root list fields requests.
message ListFieldsResponse {
repeated ListFieldsEntryResponse fields = 1;
repeated ListFieldsEntry entries = 1;
}

message ListFieldsEntryResponse {
/// Message containing the fields metadata for a split sorted by (name, type) and stored zstd-compressed in the split. Currently duplicate of ListFieldsResponse, but kept
/// distinct so they can evolve independently.
message ListFieldsMetadata {
repeated ListFieldsEntry entries = 1;
}

message ListFieldsEntry {
string field_name = 1;
ListFieldType field_type = 2;
ListFieldsType field_type = 2;
// The index ids the field exists
repeated string index_ids = 3;
// True means the field is searchable (indexed) in at least some indices.
Expand All @@ -168,7 +175,7 @@ message ListFieldsEntryResponse {
repeated string non_aggregatable_index_ids = 7;
}

enum ListFieldType {
enum ListFieldsType {
STR = 0;
U64 = 1;
I64 = 2;
Expand All @@ -180,9 +187,7 @@ enum ListFieldType {
IP_ADDR = 8;
JSON = 9;
}
message ListFields {
repeated ListFieldsEntryResponse fields = 1;
}

// -- Search -------------------

message SearchRequest {
Expand Down
29 changes: 16 additions & 13 deletions quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading