Skip to content

Commit ac9394e

Browse files
mgautierfrrgaudin
authored andcommitted
Wrap custom IndexData
1 parent fdf6277 commit ac9394e

5 files changed

Lines changed: 227 additions & 11 deletions

File tree

libzim/libwrapper.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ ObjWrapper::~ObjWrapper()
7171
template<typename Output>
7272
Output _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error);
7373

74+
template<>
75+
bool _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
76+
return bool_cy_call_fct(obj, methodName, &error);
77+
}
78+
7479
template<>
7580
std::string _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
7681
return string_cy_call_fct(obj, methodName, &error);
@@ -81,6 +86,11 @@ uint64_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::str
8186
return uint64_cy_call_fct(obj, methodName, &error);
8287
}
8388

89+
template<>
90+
uint32_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
91+
return uint32_cy_call_fct(obj, methodName, &error);
92+
}
93+
8494
template<>
8595
zim::Blob _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
8696
return blob_cy_call_fct(obj, methodName, &error);
@@ -92,12 +102,24 @@ _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& erro
92102
return std::unique_ptr<zim::writer::ContentProvider>(contentprovider_cy_call_fct(obj, methodName, &error));
93103
}
94104

105+
template<>
106+
std::shared_ptr<zim::writer::IndexData>
107+
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
108+
return std::shared_ptr<zim::writer::IndexData>(indexdata_cy_call_fct(obj, methodName, &error));
109+
}
110+
95111
template<>
96112
zim::writer::Hints
97113
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
98114
return hints_cy_call_fct(obj, methodName, &error);
99115
}
100116

117+
template<>
118+
zim::writer::IndexData::GeoPosition
119+
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
120+
return geoposition_cy_call_fct(obj, methodName, &error);
121+
}
122+
101123
// This cpp function call a python method on a python object.
102124
// It checks that we are in a valid state and handle any potential error coming from python.
103125
template<typename Output>
@@ -130,6 +152,44 @@ zim::Blob ContentProviderWrapper::feed()
130152
return callMethodOnObj<zim::Blob>(m_obj, "feed");
131153
}
132154

155+
156+
/*
157+
################################
158+
# Index Data Wrapper #
159+
################################
160+
*/
161+
162+
bool IndexDataWrapper::hasIndexData() const
163+
{
164+
return callMethodOnObj<bool>(m_obj, "has_indexdata");
165+
}
166+
167+
std::string IndexDataWrapper::getTitle() const
168+
{
169+
return callMethodOnObj<std::string>(m_obj, "get_title");
170+
}
171+
172+
std::string IndexDataWrapper::getContent() const
173+
{
174+
return callMethodOnObj<std::string>(m_obj, "get_content");
175+
}
176+
177+
std::string IndexDataWrapper::getKeywords() const
178+
{
179+
return callMethodOnObj<std::string>(m_obj, "get_keywords");
180+
}
181+
182+
uint32_t IndexDataWrapper::getWordCount() const
183+
{
184+
return callMethodOnObj<std::uint32_t>(m_obj, "get_wordcount");
185+
}
186+
187+
zim::writer::IndexData::GeoPosition IndexDataWrapper::getGeoPosition() const
188+
{
189+
return callMethodOnObj<zim::writer::IndexData::GeoPosition>(m_obj, "get_geoposition");
190+
}
191+
192+
133193
/*
134194
#########################
135195
# WriterItem #
@@ -161,6 +221,15 @@ WriterItemWrapper::getContentProvider() const
161221
return callMethodOnObj<std::unique_ptr<zim::writer::ContentProvider>>(m_obj, "get_contentprovider");
162222
}
163223

224+
std::shared_ptr<zim::writer::IndexData>
225+
WriterItemWrapper::getIndexData() const
226+
{
227+
if (!obj_has_attribute(m_obj, "get_indexdata")) {
228+
return zim::writer::Item::getIndexData();
229+
}
230+
return callMethodOnObj<std::shared_ptr<zim::writer::IndexData>>(m_obj, "get_indexdata");
231+
}
232+
164233
zim::writer::Hints WriterItemWrapper::getHints() const
165234
{
166235
return callMethodOnObj<zim::writer::Hints>(m_obj, "get_hints");

libzim/libwrapper.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ class WriterItemWrapper : public zim::writer::Item, private ObjWrapper
305305
std::string getTitle() const override;
306306
std::string getMimeType() const override;
307307
std::unique_ptr<zim::writer::ContentProvider> getContentProvider() const override;
308+
std::shared_ptr<zim::writer::IndexData> getIndexData() const override;
308309
zim::writer::Hints getHints() const override;
309310
};
310311

@@ -317,6 +318,19 @@ class ContentProviderWrapper : public zim::writer::ContentProvider, private ObjW
317318
zim::Blob feed() override;
318319
};
319320

321+
class IndexDataWrapper: public zim::writer::IndexData, private ObjWrapper
322+
{
323+
public:
324+
IndexDataWrapper(PyObject *obj) : ObjWrapper(obj) {};
325+
~IndexDataWrapper() = default;
326+
bool hasIndexData() const override;
327+
std::string getTitle() const override;
328+
std::string getContent() const override;
329+
std::string getKeywords() const override;
330+
uint32_t getWordCount() const override;
331+
IndexData::GeoPosition getGeoPosition() const override;
332+
};
333+
320334

321335
// Small helpers
322336

libzim/libzim.pyx

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ import pathlib
4040
import sys
4141
import traceback
4242
from types import ModuleType
43-
from typing import Dict, Generator, Iterator, List, Set, Union
43+
from typing import Dict, Generator, Iterator, List, Optional, Set, Tuple, Union
4444
from uuid import UUID
4545

4646
from cpython.buffer cimport PyBUF_WRITABLE
4747
from cpython.ref cimport PyObject
4848

4949
from cython.operator import preincrement
5050

51-
from libc.stdint cimport uint64_t
51+
from libc.stdint cimport uint32_t, uint64_t
5252
from libcpp cimport bool
5353
from libcpp.map cimport map
5454
from libcpp.memory cimport shared_ptr
@@ -87,6 +87,13 @@ cdef object call_method(object obj, string method):
8787
# object to the correct cpp type.
8888
# Will be used by cpp side to call python method.
8989
cdef public api:
90+
bool obj_has_attribute(object obj, string attribute) with gil:
91+
"""Check if a object has a given attribute"""
92+
attr = getattr(obj, attribute.decode('UTF-8'), None)
93+
if not attr:
94+
return False
95+
return True
96+
9097
string string_cy_call_fct(object obj, string method, string *error) with gil:
9198
"""Lookup and execute a pure virtual method on object returning a string"""
9299
try:
@@ -122,15 +129,27 @@ cdef public api:
122129

123130
return NULL
124131

125-
# currently have no virtual method returning a bool (was should_index/compress)
126-
# bool bool_cy_call_fct(object obj, string method, string *error) with gil:
127-
# """Lookup and execute a pure virtual method on object returning a bool"""
128-
# try:
129-
# func = getattr(obj, method.decode('UTF-8'))
130-
# return func()
131-
# except Exception as e:
132-
# error[0] = traceback.format_exc().encode('UTF-8')
133-
# return False
132+
zim.IndexData* indexdata_cy_call_fct(object obj, string method, string *error) with gil:
133+
"""Lookup and execute a pure virtual method on object returning a IndexData"""
134+
try:
135+
indexData = call_method(obj, method)
136+
if not indexData:
137+
# indexData is none
138+
return NULL;
139+
return new zim.IndexDataWrapper(<PyObject*>indexData)
140+
except Exception as e:
141+
error[0] = traceback.format_exc().encode('UTF-8')
142+
143+
return NULL
144+
145+
bool bool_cy_call_fct(object obj, string method, string *error) with gil:
146+
"""Lookup and execute a pure virtual method on object returning a bool"""
147+
try:
148+
return call_method(obj, method)
149+
except Exception as e:
150+
error[0] = traceback.format_exc().encode('UTF-8')
151+
152+
return False
134153

135154
uint64_t uint64_cy_call_fct(object obj, string method, string *error) with gil:
136155
"""Lookup and execute a pure virtual method on object returning an uint64_t"""
@@ -141,6 +160,26 @@ cdef public api:
141160

142161
return 0
143162

163+
uint32_t uint32_cy_call_fct(object obj, string method, string *error) with gil:
164+
"""Lookup and execute a pure virtual method on object returning an uint_32"""
165+
try:
166+
return <uint32_t> call_method(obj, method)
167+
except Exception as e:
168+
error[0] = traceback.format_exc().encode('UTF-8')
169+
170+
return 0
171+
172+
zim.GeoPosition geoposition_cy_call_fct(object obj, string method, string *error) with gil:
173+
"""Lookup and execute a pure virtual method on object returning a GeoPosition"""
174+
try:
175+
geoPosition = call_method(obj, method)
176+
if geoPosition:
177+
return zim.GeoPosition(True, geoPosition[0], geoPosition[1]);
178+
except Exception as e:
179+
error[0] = traceback.format_exc().encode('UTF-8')
180+
181+
return zim.GeoPosition(False, 0, 0)
182+
144183
map[zim.HintKeys, uint64_t] convertToCppHints(dict hintsDict):
145184
"""C++ Hints from Python dict"""
146185
cdef map[zim.HintKeys, uint64_t] ret;
@@ -439,6 +478,40 @@ class FileProvider(ContentProvider):
439478
yield WritingBlob(res)
440479
res = fh.read(bsize)
441480

481+
class IndexData:
482+
""" IndexData stub to override
483+
484+
Return a subclass of it in Item.get_indexdata()"""
485+
__module__ = writer_module_name
486+
487+
def has_indexdata(self) -> bool:
488+
"""Return true if the IndexData actually contains data"""
489+
return False
490+
491+
def get_title(self) -> str:
492+
"""Title to index. Might be the same as Item.get_title or not"""
493+
raise NotImplementedError("get_title must be implemented.")
494+
495+
def get_content(self) -> str:
496+
"""Content to index. Might be the same as Item.get_title or not"""
497+
raise NotImplementedError("get_content must be implemented.")
498+
499+
def get_keywords(self) -> str:
500+
"""Keywords used to index the item.
501+
502+
Must be a string containing keywords separated by a space"""
503+
raise NotImplementedError("get_keywords must be implemented.")
504+
505+
def get_wordcount(self) -> int:
506+
"""Number of word in content"""
507+
raise NotImplementedError("get_wordcount must be implemented.")
508+
509+
def get_geoposition(self) -> Optional[Tuple[float, float]]:
510+
"""GeoPosition used to index the item.
511+
512+
Must be a tuple (latitude, longitude) or None"""
513+
return None
514+
442515

443516
class BaseWritingItem:
444517
"""Item stub to override
@@ -529,6 +602,7 @@ writer_public_objects = [
529602
ContentProvider,
530603
FileProvider,
531604
StringProvider,
605+
IndexData,
532606
pascalize
533607
]
534608
writer = create_module(writer_module_name, writer_module_doc, writer_public_objects)

libzim/zim.pxd

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ cdef extern from "zim/writer/item.h" namespace "zim::writer":
4545
COMPRESS
4646
FRONT_ARTICLE
4747

48+
cdef cppclass IndexData:
49+
pass
50+
51+
cdef extern from "zim/writer/item.h" namespace "zim::writer::IndexData":
52+
cppclass GeoPosition:
53+
GeoPosition()
54+
GeoPosition(bool, double, double)
55+
4856
cdef extern from "zim/writer/contentProvider.h" namespace "zim::writer":
4957
cdef cppclass ContentProvider:
5058
pass
@@ -91,6 +99,8 @@ cdef extern from "libwrapper.h":
9199
ContentProviderWrapper(PyObject* obj) except +
92100
cdef cppclass WriterItemWrapper:
93101
WriterItemWrapper(PyObject* obj) except +
102+
cdef cppclass IndexDataWrapper(IndexData):
103+
IndexDataWrapper(PyObject* obj) except +
94104

95105
Compression comp_from_int(int)
96106

tests/test_libzim_creator.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Creator,
2121
FileProvider,
2222
Hint,
23+
IndexData,
2324
Item,
2425
StringProvider,
2526
)
@@ -639,6 +640,54 @@ def test_hints_values(fpath):
639640
)
640641

641642

643+
@pytest.mark.parametrize(
644+
"indexData, customContent, search_expected",
645+
[
646+
(None, "", [("standard", 1), ("home", 0), ("computer", 0)]),
647+
(False, "", [("standard", 1), ("home", 0), ("computer", 0)]),
648+
(True, "home", [("standard", 1), ("home", 1), ("computer", 0)]),
649+
(True, "computer", [("standard", 1), ("home", 0), ("computer", 1)]),
650+
(True, "standard", [("standard", 2), ("home", 0), ("computer", 0)]),
651+
],
652+
)
653+
def test_custom_indexdata(
654+
fpath, lipsum_item, lipsum, indexData, customContent, search_expected
655+
):
656+
item = StaticItem(path=HOME_PATH + "custom", content=lipsum, mimetype="text/html")
657+
if indexData is None:
658+
item.get_indexdata = lambda: None
659+
else:
660+
661+
class CustomIndexData(IndexData):
662+
def has_indexdata(self):
663+
return indexData
664+
665+
def get_title(self):
666+
return ""
667+
668+
def get_content(self):
669+
return customContent
670+
671+
def get_keywords(self):
672+
return ""
673+
674+
def get_wordcount(self):
675+
return 1
676+
677+
item.get_indexdata = CustomIndexData
678+
679+
with Creator(fpath).config_indexing(True, "eng") as c:
680+
c.add_item(lipsum_item)
681+
c.add_item(item)
682+
683+
zim = Archive(fpath)
684+
searcher = Searcher(zim)
685+
for search_query, expected in search_expected:
686+
query = Query().set_query(search_query)
687+
search = searcher.search(query)
688+
assert search.getEstimatedMatches() == expected
689+
690+
642691
def test_reimpfeed(fpath):
643692
class AContentProvider:
644693
def __init__(self):

0 commit comments

Comments
 (0)