Skip to content

Commit a4f1376

Browse files
committed
[tools/RDFWriter] Add custom RDF Subclassing
Closes #395
1 parent 030888d commit a4f1376

1 file changed

Lines changed: 49 additions & 11 deletions

File tree

odml/tools/rdf_converter.py

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import os
77
import uuid
8+
import warnings
89

910
from io import StringIO
1011
from rdflib import Graph, Literal, URIRef
@@ -57,19 +58,32 @@ class RDFWriter(object):
5758
"""
5859
A writer to parse odML files into RDF documents.
5960
60-
Use the 'rdf_subclassing' flag to disable default usage
61-
of Section type conversion to RDF Subclasses.
61+
Use the 'rdf_subclassing' flag to disable default usage of Section type conversion to
62+
RDF Subclasses.
63+
Provide a custom Section type to RDF Subclass Name mapping dictionary via the
64+
'custom_subclasses' attribute to add custom or overwrite default RDF Subclass mappings.
6265
6366
Usage:
6467
RDFWriter(odml_docs).get_rdf_str('turtle')
6568
RDFWriter(odml_docs).write_file("/output_path", "rdf_format")
69+
70+
RDFWriter(odml_docs, rdf_subclassing=False).write_file("path", "rdf_format")
71+
RDFWriter(odml_docs, custom_subclasses=custom_dict).write_file("path", "rdf_format")
6672
"""
6773

68-
def __init__(self, odml_documents, rdf_subclassing=True):
74+
def __init__(self, odml_documents, rdf_subclassing=True, custom_subclasses=None):
6975
"""
7076
:param odml_documents: list of odML documents
7177
:param rdf_subclassing: Flag whether Section types should be converted to RDF Subclasses
7278
for enhanced SPARQL queries. Default is 'True'.
79+
:param custom_subclasses: A dict where the keys reference a Section type and the
80+
corresponding values reference an RDF Class Name. When exporting
81+
a Section of a type contained in this dict, the resulting RDF
82+
Instance will be of the corresponding Class and this Class will
83+
be added as a Subclass of RDF Class "odml:Section" to the
84+
RDF document.
85+
Key:value pairs of the "custom_subclasses" dict will overwrite
86+
existing key:value pairs of the default subclassing dict.
7387
"""
7488
if not isinstance(odml_documents, list):
7589
odml_documents = [odml_documents]
@@ -79,9 +93,14 @@ def __init__(self, odml_documents, rdf_subclassing=True):
7993
self.graph = Graph()
8094
self.graph.bind("odml", ODML_NS)
8195

82-
self.section_subclasses = load_rdf_subclasses()
8396
self.rdf_subclassing = rdf_subclassing
8497

98+
self.section_subclasses = load_rdf_subclasses()
99+
# If a custom Section type to RDF Subclass dict has been provided,
100+
# parse it and update the default section_subclasses dict with the content.
101+
if custom_subclasses and isinstance(custom_subclasses, dict):
102+
self._parse_custom_subclasses(custom_subclasses)
103+
85104
def convert_to_rdf(self):
86105
"""
87106
convert_to_rdf converts all odML documents to RDF,
@@ -228,25 +247,18 @@ def save_section(self, sec, curr_node):
228247
# Add type of current node to the RDF graph
229248
curr_type = fmt.rdf_type
230249

231-
print(curr_type)
232-
233250
# Handle section subclass types
234251
if self.rdf_subclassing:
235-
print("I'm in here")
236252
sub_sec = self._get_section_subclass(sec)
237253
if sub_sec:
238254
curr_type = sub_sec
239255

240-
print(curr_type)
241-
242256
self.graph.add((curr_node, RDF.type, URIRef(curr_type)))
243257

244258
for k in fmt.rdf_map_keys:
245259
curr_pred = fmt.rdf_map(k)
246260
curr_val = getattr(sec, k)
247261

248-
print("pred: %s; val: %s" % (curr_pred, curr_val))
249-
250262
# Ignore an "id" entry, it has already been used to create the node itself.
251263
if k == "id" or not curr_val:
252264
continue
@@ -310,6 +322,32 @@ class Section.
310322

311323
return None
312324

325+
def _parse_custom_subclasses(self, custom_subclasses):
326+
"""
327+
Parses a provided dictionary of "Section type": "RDF Subclass name"
328+
key value pairs and adds the pairs to the parsers' 'section_subclasses'
329+
default dictionary. Existing key:value pairs will be overwritten
330+
with provided custom key:value pairs and a Warning will be issued.
331+
Dictionary values containing whitespaces will raise a ValueError.
332+
333+
:param custom_subclasses: dictionary of "Section type": "RDF Subclass name" key value pairs.
334+
Values must not contain whitespaces, a ValueError will be raised
335+
otherwise.
336+
"""
337+
338+
# Do not allow whitespaces in values
339+
if " " in "".join(custom_subclasses.values()):
340+
msg = "Custom RDF Subclass names must not contain any whitespaces."
341+
raise ValueError(msg)
342+
343+
for k in custom_subclasses:
344+
val = custom_subclasses[k]
345+
if k in self.section_subclasses:
346+
msg = "RDFWriter custom subclasses: Key '%s' already exists. " % k
347+
msg += "Value '%s' replaces default value '%s'." % (val, self.section_subclasses[k])
348+
warnings.warn(msg, stacklevel=2)
349+
self.section_subclasses[k] = val
350+
313351
def __str__(self):
314352
return self.convert_to_rdf().serialize(format='turtle').decode("utf-8")
315353

0 commit comments

Comments
 (0)