Skip to content

Commit f4b7a20

Browse files
committed
feature: redo python api
* matchers can be combined using (`~`, `|`, `&`) * depth matcher can be used * a bigger refactor of the matching logic
1 parent 2e13243 commit f4b7a20

4 files changed

Lines changed: 192 additions & 30 deletions

File tree

src/lib.rs

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,104 @@ use std::sync::{Arc, Mutex};
66
use streamson_lib::{error, handler, matcher, Collector};
77

88
create_exception!(streamson, StreamsonError, exceptions::ValueError);
9+
create_exception!(streamson, MatcherUsed, exceptions::RuntimeError);
910

1011
impl From<error::General> for StreamsonError {
1112
fn from(_gerror: error::General) -> Self {
1213
Self
1314
}
1415
}
1516

17+
/// Python wrapper around matchers
18+
#[pyclass]
19+
#[derive(Debug)]
20+
pub struct RustMatcher {
21+
inner: Option<matcher::Combinator>,
22+
}
23+
24+
#[pymethods]
25+
impl RustMatcher {
26+
/// Create a new instance of simple matcher
27+
///
28+
/// # Arguments
29+
/// * `path` - path to match
30+
/// * `max_depth` - max depth
31+
#[staticmethod]
32+
pub fn simple(path: String) -> PyResult<Self> {
33+
Ok(Self {
34+
inner: Some(matcher::Combinator::new(matcher::Simple::new(path))),
35+
})
36+
}
37+
38+
/// Create a new instance of depth matcher
39+
///
40+
/// # Arguments
41+
/// * `min_depth` - min depth
42+
/// * `max_depth` - max depth (Optional)
43+
#[staticmethod]
44+
pub fn depth(min_depth: usize, max_depth: Option<usize>) -> PyResult<Self> {
45+
Ok(Self {
46+
inner: Some(matcher::Combinator::new(matcher::Depth::new(
47+
min_depth, max_depth,
48+
))),
49+
})
50+
}
51+
52+
pub fn inv(&mut self) -> PyResult<Self> {
53+
if let Some(inner) = self.inner.take() {
54+
Ok(Self {
55+
inner: Some(!inner),
56+
})
57+
} else {
58+
Err(MatcherUsed.into())
59+
}
60+
}
61+
62+
pub fn any(&mut self, right: &mut RustMatcher) -> PyResult<Self> {
63+
if let (Some(left), Some(right)) = (self.inner.take(), right.inner.take()) {
64+
Ok(Self {
65+
inner: Some(left | right),
66+
})
67+
} else {
68+
Err(MatcherUsed.into())
69+
}
70+
}
71+
72+
pub fn all(&mut self, right: &mut RustMatcher) -> PyResult<Self> {
73+
if let (Some(left), Some(right)) = (self.inner.take(), right.inner.take()) {
74+
Ok(Self {
75+
inner: Some(left & right),
76+
})
77+
} else {
78+
Err(MatcherUsed.into())
79+
}
80+
}
81+
82+
#[getter]
83+
pub fn _used(&self) -> bool {
84+
self.inner.is_none()
85+
}
86+
}
87+
1688
/// Low level Python wrapper for Simple matcher and Buffer handler
1789
#[pyclass]
18-
pub struct SimpleStreamson {
90+
pub struct Streamson {
1991
collector: Collector,
2092
handler: Arc<Mutex<handler::Buffer>>,
2193
}
2294

2395
#[pymethods]
24-
impl SimpleStreamson {
25-
/// Create a new instance of SimpleStreamson
96+
impl Streamson {
97+
/// Create a new instance of Streamson
2698
///
2799
/// # Arguments
28100
/// * `matches` - a list of valid simple matches (e.g. `{"users"}`, `[]{"name"}`, `[0]{}`)
29101
#[new]
30-
pub fn new(matches: Vec<String>) -> Self {
102+
pub fn new(matcher: &mut RustMatcher) -> PyResult<Self> {
31103
let handler = Arc::new(Mutex::new(handler::Buffer::new()));
32-
let mut collector = Collector::new();
33-
for path_match in matches {
34-
collector = collector.add_matcher(
35-
Box::new(matcher::Simple::new(path_match)),
36-
&[handler.clone()],
37-
);
38-
}
39-
Self { collector, handler }
104+
let matcher = matcher.inner.take().ok_or(MatcherUsed)?;
105+
let collector = Collector::new().add_matcher(Box::new(matcher), &[handler.clone()]);
106+
Ok(Self { collector, handler })
40107
}
41108

42109
/// Feeds Streamson processor with data
@@ -66,7 +133,8 @@ impl SimpleStreamson {
66133
/// This module is a python module implemented in Rust.
67134
#[pymodule]
68135
fn streamson(_py: Python, m: &PyModule) -> PyResult<()> {
69-
m.add_class::<SimpleStreamson>()?;
136+
m.add_class::<Streamson>()?;
137+
m.add_class::<RustMatcher>()?;
70138

71139
Ok(())
72140
}

streamson/__init__.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,47 @@
22
import typing
33
from array import array
44

5-
from streamson.streamson import SimpleStreamson as _SimpleStreamson
5+
from streamson.streamson import RustMatcher as _RustMatcher
6+
from streamson.streamson import Streamson as _Streamson
7+
8+
9+
class Matcher:
10+
def __init__(self, rust_matcher: _RustMatcher):
11+
self.inner = rust_matcher
12+
13+
def __invert__(self):
14+
self.inner = self.inner.inv()
15+
return self
16+
17+
def __or__(self, other):
18+
self.inner = self.inner.any(other.inner)
19+
return self
20+
21+
def __and__(self, other):
22+
self.inner = self.inner.all(other.inner)
23+
return self
24+
25+
26+
class DepthMatcher(Matcher):
27+
def __init__(self, min_depth: int, max_depth: typing.Optional[int] = None):
28+
super().__init__(_RustMatcher.depth(min_depth, max_depth))
29+
30+
31+
class SimpleMatcher(Matcher):
32+
def __init__(self, path: str):
33+
super().__init__(_RustMatcher.simple(path))
634

735

836
def extract_iter(
9-
input_gen: typing.Generator[bytes, None, None], simple_matches: typing.List[str],
37+
input_gen: typing.Generator[bytes, None, None], matcher: Matcher,
1038
) -> typing.Generator[typing.Tuple[str, typing.Any], None, None]:
1139
""" Extracts json specified by givem list of simple matches
1240
:param: input_gen - input generator
1341
:param: simple_matches - matches to check
1442
1543
:returns: (string, data) generator
1644
"""
17-
streamson = _SimpleStreamson(simple_matches)
45+
streamson = _Streamson(matcher.inner)
1846
for item in input_gen:
1947
streamson.feed(item)
2048
res = streamson.pop()

tests/test_basic.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

tests/test_extract.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import pytest
2+
3+
import streamson
4+
5+
DATA = [b'{"users": ["john","carl","bob"]}']
6+
7+
8+
def test_simple():
9+
matcher = streamson.SimpleMatcher('{"users"}[]')
10+
extracted = streamson.extract_iter((e for e in DATA), matcher)
11+
assert next(extracted) == ('{"users"}[0]', "john")
12+
assert next(extracted) == ('{"users"}[1]', "carl")
13+
assert next(extracted) == ('{"users"}[2]', "bob")
14+
15+
with pytest.raises(StopIteration):
16+
next(extracted)
17+
18+
19+
def test_depth():
20+
matcher = streamson.DepthMatcher(1)
21+
extracted = streamson.extract_iter((e for e in DATA), matcher)
22+
assert next(extracted) == ('{"users"}[0]', "john")
23+
assert next(extracted) == ('{"users"}[1]', "carl")
24+
assert next(extracted) == ('{"users"}[2]', "bob")
25+
assert next(extracted) == ('{"users"}', ["john", "carl", "bob"])
26+
27+
with pytest.raises(StopIteration):
28+
next(extracted)
29+
30+
matcher = streamson.DepthMatcher(0, 1)
31+
extracted = streamson.extract_iter((e for e in DATA), matcher)
32+
assert next(extracted) == ('{"users"}', ["john", "carl", "bob"])
33+
assert next(extracted) == ("", {"users": ["john", "carl", "bob"]})
34+
35+
with pytest.raises(StopIteration):
36+
next(extracted)
37+
38+
39+
def test_invert():
40+
matcher = ~streamson.DepthMatcher(2)
41+
extracted = streamson.extract_iter((e for e in DATA), matcher)
42+
assert next(extracted) == ('{"users"}', ["john", "carl", "bob"])
43+
assert next(extracted) == ("", {"users": ["john", "carl", "bob"]})
44+
45+
46+
def test_all():
47+
matcher = streamson.SimpleMatcher('{"users"}[]') & streamson.SimpleMatcher("{}[1]")
48+
49+
extracted = streamson.extract_iter((e for e in DATA), matcher)
50+
assert next(extracted) == ('{"users"}[1]', "carl")
51+
52+
with pytest.raises(StopIteration):
53+
next(extracted)
54+
55+
56+
def test_any():
57+
matcher = streamson.DepthMatcher(2, 2) | streamson.SimpleMatcher('{"users"}')
58+
59+
extracted = streamson.extract_iter((e for e in DATA), matcher)
60+
assert next(extracted) == ('{"users"}[0]', "john")
61+
assert next(extracted) == ('{"users"}[1]', "carl")
62+
assert next(extracted) == ('{"users"}[2]', "bob")
63+
assert next(extracted) == ('{"users"}', ["john", "carl", "bob"])
64+
65+
with pytest.raises(StopIteration):
66+
next(extracted)
67+
68+
69+
def test_complex():
70+
matcher = (streamson.DepthMatcher(2, 2) | streamson.SimpleMatcher('{"users"}')) & ~streamson.SimpleMatcher(
71+
'{"users"}[0]'
72+
)
73+
74+
extracted = streamson.extract_iter((e for e in DATA), matcher)
75+
assert next(extracted) == ('{"users"}[1]', "carl")
76+
assert next(extracted) == ('{"users"}[2]', "bob")
77+
assert next(extracted) == ('{"users"}', ["john", "carl", "bob"])
78+
79+
with pytest.raises(StopIteration):
80+
next(extracted)

0 commit comments

Comments
 (0)