-
Notifications
You must be signed in to change notification settings - Fork 16
feat Nextcloudclient #29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
3f73738
9edc0dc
a56f01d
5fdf78b
800256c
66f1c8e
4259229
b179f90
a504b9d
0ce0c24
6596cbc
b9f9854
2f8493d
07359cc
8047968
0172450
f957512
607f527
6cb7e11
7651c31
df17a7c
7492531
c985603
62a3611
22ac02f
3faaf4d
5dfebe5
f9367c0
5d474db
bef78ef
529f2ae
77dca5a
02b1873
04c0b6e
fb93bc9
8e6167b
943e30b
1274cbc
02481b3
a5ec24d
f95155f
274f252
f22c71d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| import os | ||
| import sys | ||
| import argparse | ||
| import json | ||
|
|
||
| from databusclient import create_distribution, create_dataset, deploy | ||
| from dotenv import load_dotenv | ||
|
|
||
| from nextcloudclient.upload import upload_to_nextcloud | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Import path breaks when running from databusclient/; add fallback. README shows -from nextcloudclient.upload import upload_to_nextcloud
+try:
+ from nextcloudclient.upload import upload_to_nextcloud
+except ModuleNotFoundError:
+ try:
+ from databusclient.upload import upload_to_nextcloud
+ except ModuleNotFoundError as e:
+ raise ModuleNotFoundError(
+ "upload_to_nextcloud not found. Install/provide 'nextcloudclient' "
+ "or place 'upload.py' under 'databusclient' (importable as databusclient.upload)."
+ ) from e🤖 Prompt for AI Agents |
||
| def deploy_to_databus( | ||
| metadata, | ||
| version_id, | ||
| title, | ||
| abstract, | ||
| description, | ||
| license_url | ||
| ): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarify expected metadata structure. The Apply this diff to flatten the metadata when it comes from In the main block at line 101, flatten the result: - metadata = upload_to_nextcloud(args.files, args.remote, args.path, args.webdav_url)
+ metadata_nested = upload_to_nextcloud(args.files, args.remote, args.path, args.webdav_url)
+ # Flatten the nested list: upload_to_nextcloud returns List[List[Tuple]]
+ metadata = [item for sublist in metadata_nested for item in sublist]Alternatively, update the function signature and add type hints to make the expected structure explicit: def deploy_to_databus(
metadata: list[tuple[str, str, int, str]], # list of (filename, checksum, size, url)
version_id: str,
...
): |
||
| load_dotenv() | ||
| api_key = os.getenv("API_KEY") | ||
| if not api_key: | ||
| raise ValueError("API_KEY not found in .env") | ||
|
|
||
| distributions = [] | ||
| counter = 0 | ||
| for filename, checksum, size, url in metadata: | ||
| # Expect a SHA-256 hex digest (64 chars). Reject others. | ||
| if not isinstance(checksum, str) or len(checksum) != 64: | ||
| raise ValueError(f"Invalid checksum for {filename}: expected SHA-256 hex (64 chars), got '{checksum}'") | ||
| parts = filename.split(".") | ||
| if len(parts) == 1: | ||
| file_format = "none" | ||
| compression = "none" | ||
| elif len(parts) == 2: | ||
| file_format = parts[-1] | ||
| compression = "none" | ||
| else: | ||
| file_format = parts[-2] | ||
| compression = parts[-1] | ||
|
|
||
| distributions.append( | ||
| create_distribution( | ||
| url=url, | ||
| cvs={"count": f"{counter}"}, | ||
| file_format=file_format, | ||
| compression=compression, | ||
| sha256_length_tuple=(checksum, size) | ||
| ) | ||
| ) | ||
| counter += 1 | ||
|
|
||
| dataset = create_dataset( | ||
| version_id=version_id, | ||
| title=title, | ||
| abstract=abstract, | ||
| description=description, | ||
| license_url=license_url, | ||
| distributions=distributions | ||
| ) | ||
|
|
||
| deploy(dataset, api_key) | ||
| metadata_string = ",\n".join([entry[-1] for entry in metadata]) | ||
|
|
||
| print(f"Successfully deployed\n{metadata_string}\nto databus {version_id}") | ||
|
|
||
|
|
||
| def parse_args(): | ||
| parser = argparse.ArgumentParser(description="Upload files to Nextcloud and deploy to DBpedia Databus.") | ||
| parser.add_argument("files", nargs="*", help="Path(s) to file(s) or folder(s) to upload") | ||
| parser.add_argument("--webdav-url", help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)") | ||
| parser.add_argument("--remote", help="rclone remote name (e.g., 'nextcloud')") | ||
| parser.add_argument("--path", help="Remote path on Nextcloud (e.g., 'datasets/mydataset')") | ||
| parser.add_argument("--no-upload", action="store_true", help="Skip file upload and use existing metadata") | ||
| parser.add_argument("--metadata", help="Path to metadata JSON file (required if --no-upload is used)") | ||
|
|
||
| parser.add_argument("--version-id", required=True, help="Databus version URI") | ||
| parser.add_argument("--title", required=True, help="Title of the dataset") | ||
| parser.add_argument("--abstract", required=True, help="Short abstract of the dataset") | ||
| parser.add_argument("--description", required=True, help="Detailed description of the dataset") | ||
| parser.add_argument("--license", required=True, help="License URL (e.g., https://dalicc.net/licenselibrary/Apache-2.0)") | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| args = parse_args() | ||
|
|
||
| if args.no_upload: | ||
| if not args.metadata: | ||
| print("Error: --metadata is required when using --no-upload") | ||
| sys.exit(1) | ||
| if not os.path.isfile(args.metadata): | ||
| print(f"Error: Metadata file not found: {args.metadata}") | ||
| sys.exit(1) | ||
| with open(args.metadata, 'r') as f: | ||
| metadata = json.load(f) | ||
| else: | ||
| if not (args.webdav_url and args.remote and args.path): | ||
| print("Error: --webdav-url, --remote, and --path are required unless --no-upload is used") | ||
| sys.exit(1) | ||
| metadata = upload_to_nextcloud(args.files, args.remote, args.path, args.webdav_url) | ||
|
|
||
| deploy_to_databus( | ||
| metadata, | ||
| version_id=args.version_id, | ||
| title=args.title, | ||
| abstract=args.abstract, | ||
| description=args.description, | ||
| license_url=args.license | ||
| ) | ||
|
gg46ixav marked this conversation as resolved.
Outdated
gg46ixav marked this conversation as resolved.
Outdated
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| [ | ||
| [ | ||
| "example.ttl", | ||
| "6e340b9cffb37a989ca544e6bb780a2c7e5d7dcb", | ||
| 12345, | ||
| "https://cloud.example.com/remote.php/webdav/datasets/mydataset/example.ttl" | ||
| ], | ||
| [ | ||
| "example.csv.gz", | ||
| "3f786850e387550fdab836ed7e6dc881de23001b", | ||
| 54321, | ||
| "https://cloud.example.com/remote.php/webdav/datasets/mydataset/example.csv.gz" | ||
| ] | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| import hashlib | ||
| import os | ||
| import subprocess | ||
| import posixpath | ||
| from urllib.parse import urljoin, quote | ||
|
|
||
|
|
||
| def compute_sha256_and_length(filepath): | ||
| sha256 = hashlib.sha256() | ||
| total_length = 0 | ||
| with open(filepath, 'rb') as f: | ||
| while True: | ||
| chunk = f.read(4096) | ||
| if not chunk: | ||
| break | ||
| sha256.update(chunk) | ||
| total_length += len(chunk) | ||
| return sha256.hexdigest(), total_length | ||
|
|
||
| def get_all_files(path): | ||
| if os.path.isfile(path): | ||
| return [path] | ||
| files = [] | ||
| for root, _, filenames in os.walk(path): | ||
| for name in filenames: | ||
| files.append(os.path.join(root, name)) | ||
| return files | ||
|
|
||
| def upload_to_nextcloud(source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str): | ||
| result = [] | ||
| for path in source_paths: | ||
| if not os.path.exists(path): | ||
| print(f"Path not found: {path}") | ||
| continue | ||
|
|
||
| abs_path = os.path.abspath(path) | ||
| basename = os.path.basename(abs_path) | ||
| files = get_all_files(abs_path) | ||
|
|
||
| tmp_results = [] | ||
|
|
||
| for file in files: | ||
| checksum,size = compute_sha256_and_length(file) | ||
|
|
||
| if os.path.isdir(path): | ||
| rel_file = os.path.relpath(file, abs_path) | ||
| # Normalize to POSIX for WebDAV/URLs | ||
| rel_file = rel_file.replace(os.sep, "/") | ||
| remote_webdav_path = posixpath.join(remote_path, basename, rel_file) | ||
| else: | ||
| remote_webdav_path = posixpath.join(remote_path, os.path.basename(file)) | ||
|
|
||
| # Preserve scheme/host and percent-encode path segments | ||
| url = urljoin(webdav_url.rstrip("/") + "/", quote(remote_webdav_path.lstrip("/"), safe="/")) | ||
|
|
||
| filename = os.path.basename(file) | ||
| tmp_results.append((filename, checksum, size, url)) | ||
|
|
||
|
gg46ixav marked this conversation as resolved.
|
||
| dest_subpath = posixpath.join(remote_path.lstrip("/"), basename) | ||
| if os.path.isdir(path): | ||
| destination = f"{remote_name}:{dest_subpath}" | ||
| command = ["rclone", "copy", abs_path, destination, "--progress"] | ||
| else: | ||
| destination = f"{remote_name}:{dest_subpath}" | ||
| command = ["rclone", "copyto", abs_path, destination, "--progress"] | ||
|
|
||
|
gg46ixav marked this conversation as resolved.
|
||
| print(f"Upload: {path} → {destination}") | ||
| try: | ||
| subprocess.run(command, check=True) | ||
| result.extend(tmp_results) | ||
| print("✅ Uploaded successfully.\n") | ||
| except subprocess.CalledProcessError as e: | ||
| print(f"❌ Error uploading {path}: {e}\n") | ||
| except FileNotFoundError: | ||
| print("❌ rclone not found on PATH. Install rclone and retry.") | ||
|
|
||
| return result | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.