Skip to content

Commit 7a22b09

Browse files
committed
feature: initial implementation
1 parent f18ed61 commit 7a22b09

8 files changed

Lines changed: 183 additions & 0 deletions

File tree

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
**/target
2+
Cargo.lock
3+
*.pyc
4+
*.swo
5+
*.swp

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
0.1.0 (XXXX-XX-XX)
2+
------------------
3+
4+
* extract_iter function implemented
5+
* initial version

Cargo.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "streamson-python"
3+
version = "0.1.0"
4+
authors = ["Stepan Henek"]
5+
edition = "2018"
6+
description = "Python wrappers around streamson"
7+
license = "MIT"
8+
readme = "README.md"
9+
keywords = ["json", "python", "splitter"]
10+
repository = "https://github.com/shenek/python-streamson"
11+
categories = ["parsing"]
12+
13+
[lib]
14+
name = "streamson"
15+
crate-type = ["cdylib"]
16+
17+
[package.metadata.maturin]
18+
classifier = ["Programming Language :: Python"]
19+
20+
[dependencies]
21+
pyo3 = { version = "0.10", features = ["extension-module"] }
22+
streamson-lib = "0.2.0"

LICENSE

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright (c) 2020 Stepan Henek
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy
4+
of this software and associated documentation files (the "Software"), to deal
5+
in the Software without restriction, including without limitation the rights
6+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
copies of the Software, and to permit persons to whom the Software is
8+
furnished to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all
11+
copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19+
SOFTWARE.

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Python streamson
2+
3+
Python bindings for streamson. A memory efficient json splitter.
4+
5+
## Installation
6+
TODO
7+
8+
## Examples
9+
### Simple
10+
```python
11+
>>> import streamson
12+
>>> data = [b'{"users": ["john","carl","bob"]}']
13+
>>> extracted = streamson.extract_iter((e for e in data), ['{"users"}[]'])
14+
>>> for path, parsed in extracted:
15+
... path, parsed
16+
...
17+
('{"users"}[0]', 'john')
18+
('{"users"}[1]', 'carl')
19+
('{"users"}[2]', 'bob')
20+
```

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[build-system]
2+
requires = ["maturin"]
3+
build-backend = "maturin"
4+
5+
[tool.maturin]
6+
bindings = "pyo3"
7+
manylinux = "2014"

src/lib.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
use pyo3::prelude::*;
2+
use pyo3::create_exception;
3+
use pyo3::wrap_pyfunction;
4+
use pyo3::types::{PyBytes, PyTuple};
5+
use pyo3::exceptions;
6+
7+
use std::sync::{Arc, Mutex};
8+
use streamson_lib::{error, handler, matcher, Collector};
9+
10+
create_exception!(streamson, StreamsonError, exceptions::ValueError);
11+
12+
impl From<error::General> for StreamsonError {
13+
fn from(gerror: error::General) -> Self {
14+
Self
15+
}
16+
}
17+
18+
/// Low level Python wrapper for Simple matcher and Buffer handler
19+
#[pyclass]
20+
pub struct SimpleStreamson {
21+
collector: Collector,
22+
handler: Arc<Mutex<handler::Buffer>>,
23+
}
24+
25+
#[pymethods]
26+
impl SimpleStreamson {
27+
28+
/// Create a new instance of SimpleStreamson
29+
///
30+
/// # Arguments
31+
/// * `matches` - a list of valid simple matches (e.g. `{"users"}`, `[]{"name"}`, `[0]{}`)
32+
#[new]
33+
pub fn new(matches: Vec<String>) -> Self {
34+
let handler = Arc::new(Mutex::new(handler::Buffer::new()));
35+
let mut collector = Collector::new();
36+
for path_match in matches {
37+
collector = collector.add_matcher(
38+
Box::new(matcher::Simple::new(path_match)),
39+
&[handler.clone()],
40+
);
41+
}
42+
Self { collector, handler }
43+
}
44+
45+
/// Feeds Streamson processor with data
46+
///
47+
/// # Arguments
48+
/// * `data` - input data to be processed
49+
pub fn feed(&mut self, data: &[u8]) -> PyResult<()> {
50+
if let Err(err) = self.collector.process(data) {
51+
Err(StreamsonError::from(err).into())
52+
} else {
53+
Ok(())
54+
}
55+
}
56+
57+
/// Reads data from Buffer handler
58+
///
59+
/// # Returns
60+
/// * `None` - if no data present
61+
/// * `Some(<path>, <bytes>)` if there are some data
62+
fn pop(&mut self) -> Option<(String, Vec<u8>)>{
63+
64+
match self.handler.lock().unwrap().pop() {
65+
Some((path, bytes)) => {
66+
Some((path, bytes.to_vec()))
67+
},
68+
None => None,
69+
}
70+
71+
}
72+
}
73+
/// This module is a python module implemented in Rust.
74+
#[pymodule]
75+
fn streamson(py: Python, m: &PyModule) -> PyResult<()> {
76+
m.add_class::<SimpleStreamson>()?;
77+
78+
Ok(())
79+
}

streamson/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import json
2+
import typing
3+
4+
from array import array
5+
6+
from streamson.streamson import SimpleStreamson as _SimpleStreamson
7+
8+
9+
def extract_iter(
10+
input_gen: typing.Generator[bytes, None, None],
11+
simple_matches: typing.List[str],
12+
) -> typing.Generator[typing.Tuple[str, typing.Any], None, None]:
13+
""" Extracts json specified by givem list of simple matches
14+
:param: input_gen - input generator
15+
:param: simple_matches - matches to check
16+
17+
:returns: (string, data) generator
18+
"""
19+
streamson = _SimpleStreamson(simple_matches)
20+
for item in input_gen:
21+
streamson.feed(item)
22+
res = streamson.pop()
23+
while res is not None:
24+
path, data = res
25+
yield path, json.loads(array('B', data).tobytes())
26+
res = streamson.pop()

0 commit comments

Comments
 (0)