Skip to content

Commit d1eedaa

Browse files
committed
Moved to new repository, initial commit
0 parents  commit d1eedaa

14 files changed

Lines changed: 1280 additions & 0 deletions

.gitignore

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/
154+
155+
# PyCharm
156+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158+
# and can be added to the global gitignore or merged into this file. For a more nuclear
159+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160+
#.idea/
161+

BACKLOG.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# BACKLOG
2+
3+
* gen preview for files
4+
* add shell completion script
5+
6+
7+
## Current Problems
8+
9+
### Redeploy
10+
11+
Currently there are problems handling multiple `key,value` pairs of content variants, its only working by excluding the multiple ones in the query.
12+
13+
```
14+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
15+
PREFIX databus: <https://databus.dbpedia.org/>
16+
PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
17+
PREFIX dataid-cv: <http://dataid.dbpedia.org/ns/cv#>
18+
PREFIX dct: <http://purl.org/dc/terms/>
19+
PREFIX dcat: <http://www.w3.org/ns/dcat#>
20+
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
21+
22+
SELECT DISTINCT ?group ?art ?version ?title ?publisher ?comment ?description ?license ?file ?extension ?type ?bytes ?shasum WHERE {
23+
?dataset dataid:account databus:ontologies .
24+
?dataset dataid:group ?group .
25+
?dataset dataid:artifact ?art.
26+
?dataset dcat:distribution ?distribution .
27+
?dataset dct:license ?license .
28+
?dataset dct:publisher ?publisher .
29+
?dataset rdfs:comment ?comment .
30+
?dataset dct:description ?description .
31+
?dataset dct:title ?title .
32+
?distribution dcat:downloadURL ?file .
33+
?distribution dataid:formatExtension ?extension .
34+
?distribution dataid-cv:type ?type .
35+
?distribution dcat:byteSize ?bytes .
36+
?distribution dataid:sha256sum ?shasum .
37+
?dataset dct:hasVersion ?version .
38+
# Excludes dev versions
39+
FILTER (!regex(?art, "--DEV"))
40+
# exclude some stuff since content variants are hard
41+
MINUS { ?distribution dataid:contentVariant 'sorted'^^xsd:string . }
42+
MINUS { ?distribution dataid:contentVariant 'NONE'^^xsd:string}
43+
MINUS { ?distribution dataid:contentVariant 'goodLicense'^^xsd:string}
44+
MINUS { ?distribution dataid:contentVariant 'lodeMetadata'^^xsd:string}
45+
MINUS { ?distribution dataid:contentVariant 'old'^^xsd:string}
46+
} ORDER BY ?version
47+
```
48+
49+
## CLI Problems
50+
51+
Currently there is no syntax for submitting format extensions or content variants etc to the CLI for Databus Versions. A Sntax needs to be set, for example:
52+
```
53+
python3 -m databusclient [...] "http://akswnc7.informatik.uni-leipzig.de/dstreitmatter/archivo/advene.org/ns--cinelab--ld/2020.06.10-175249/ns--cinelab--ld_type=generatedDocu.html|key1=value1|value2|.format|..compression"
54+
```
55+
Something like this, idk.

Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
all: install
2+
3+
install:
4+
poetry install
5+
6+
clean-dist:
7+
rm -rf dist/
8+
9+
build-for-pypi:
10+
poetry build
11+
12+
deploy: clean-dist build-for-pypi
13+
poetry publish --username __token__ --password ${PYPI_TOKEN}

README.md

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# Databus Client Python
2+
3+
## Install
4+
```bash
5+
python3 -m pip install databusclient
6+
```
7+
8+
## CLI Usage
9+
```bash
10+
databusclient --help
11+
```
12+
13+
```man
14+
Usage: databusclient [OPTIONS] COMMAND [ARGS]...
15+
16+
Options:
17+
--install-completion [bash|zsh|fish|powershell|pwsh]
18+
Install completion for the specified shell.
19+
--show-completion [bash|zsh|fish|powershell|pwsh]
20+
Show completion for the specified shell, to
21+
copy it or customize the installation.
22+
--help Show this message and exit.
23+
24+
Commands:
25+
deploy
26+
downoad
27+
```
28+
### Deploy command
29+
```
30+
databusclient deploy --help
31+
```
32+
```
33+
34+
35+
Usage: databusclient deploy [OPTIONS] DISTRIBUTIONS...
36+
37+
Arguments:
38+
DISTRIBUTIONS... distributions in the form of List[URL|CV|fileext|compression|sha256sum:contentlength] where URL is the
39+
download URL and CV the key=value pairs (_ separted)
40+
content variants of a distribution, fileExt and Compression can be set, if not they are inferred from the path [required]
41+
42+
Options:
43+
--versionid TEXT target databus version/dataset identifier of the form <h
44+
ttps://databus.dbpedia.org/$ACCOUNT/$GROUP/$ARTIFACT/$VE
45+
RSION> [required]
46+
--title TEXT dataset title [required]
47+
--abstract TEXT dataset abstract max 200 chars [required]
48+
--description TEXT dataset description [required]
49+
--license TEXT license (see dalicc.net) [required]
50+
--apikey TEXT apikey [required]
51+
--help Show this message and exit.
52+
```
53+
Examples of using deploy command
54+
```
55+
databusclient deploy --versionid https://databus.dbpedia.org/user1/group1/artifact1/2022-05-18 --title title1 --abstract abstract1 --description description1 --license http://dalicc.net/licenselibrary/AdaptivePublicLicense10 --apikey MYSTERIOUS 'https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml|type=swagger'
56+
```
57+
58+
```
59+
databusclient deploy --versionid https://dev.databus.dbpedia.org/denis/group1/artifact1/2022-05-18 --title "Client Testing" --abstract "Testing the client...." --description "Testing the client...." --license http://dalicc.net/licenselibrary/AdaptivePublicLicense10 --apikey MYSTERIOUS 'https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml|type=swagger'
60+
```
61+
62+
A few more notes for CLI usage:
63+
64+
* The content variants can be left out ONLY IF there is just one distribution
65+
* For complete inferred: Just use the URL with `https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml`
66+
* If other parameters are used, you need to leave them empty like `https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml||yml|7a751b6dd5eb8d73d97793c3c564c71ab7b565fa4ba619e4a8fd05a6f80ff653:367116`
67+
68+
## Module Usage
69+
70+
### Step 1: Create lists of distributions for the dataset
71+
72+
```python
73+
from databusclient import create_distribution
74+
75+
# create a list
76+
distributions = []
77+
78+
# minimal requirements
79+
# compression and filetype will be inferred from the path
80+
# this will trigger the download of the file to evaluate the shasum and content length
81+
distributions.append(
82+
create_distribution(url="https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml", cvs={"type": "swagger"})
83+
)
84+
85+
# full parameters
86+
# will just place parameters correctly, nothing will be downloaded or inferred
87+
distributions.append(
88+
create_distribution(
89+
url="https://example.org/some/random/file.csv.bz2",
90+
cvs={"type": "example", "realfile": "false"},
91+
file_format="csv",
92+
compression="bz2",
93+
sha256_length_tuple=("7a751b6dd5eb8d73d97793c3c564c71ab7b565fa4ba619e4a8fd05a6f80ff653", 367116)
94+
)
95+
)
96+
```
97+
98+
A few notes:
99+
100+
* The dict for content variants can be empty ONLY IF there is just one distribution
101+
* There can be no compression if there is no file format
102+
103+
### Step 2: Create dataset
104+
105+
```python
106+
from databusclient import create_dataset
107+
108+
# minimal way
109+
dataset = create_dataset(
110+
version_id="https://dev.databus.dbpedia.org/denis/group1/artifact1/2022-05-18",
111+
title="Client Testing",
112+
abstract="Testing the client....",
113+
description="Testing the client....",
114+
license_url="http://dalicc.net/licenselibrary/AdaptivePublicLicense10",
115+
distributions=distributions,
116+
)
117+
118+
# with group metadata
119+
dataset = create_dataset(
120+
version_id="https://dev.databus.dbpedia.org/denis/group1/artifact1/2022-05-18",
121+
title="Client Testing",
122+
abstract="Testing the client....",
123+
description="Testing the client....",
124+
license_url="http://dalicc.net/licenselibrary/AdaptivePublicLicense10",
125+
distributions=distributions,
126+
group_title="Title of group1",
127+
group_abstract="Abstract of group1",
128+
group_description="Description of group1"
129+
)
130+
```
131+
132+
NOTE: To be used you need to set all group parameters, or it will be ignored
133+
134+
### Step 3: Deploy to databus
135+
136+
```python
137+
from databusclient import deploy
138+
139+
# to deploy something you just need the dataset from the previous step and an APIO key
140+
# API key can be found (or generated) at https://$$DATABUS_BASE$$/$$USER$$#settings
141+
deploy(dataset, "mysterious api key")
142+
```

databusclient/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from databusclient.client import create_dataset, deploy, create_distribution

databusclient/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from databusclient import cli
2+
3+
cli.app()

0 commit comments

Comments
 (0)