Skip to content

Commit e8704ab

Browse files
Add unit and integration tests for parse_distribution_str
1 parent 3b3c28c commit e8704ab

1 file changed

Lines changed: 275 additions & 0 deletions

File tree

tests/test_parse_distribution.py

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
"""Tests for parse_distribution_str function in cli.py"""
2+
3+
from unittest.mock import patch
4+
5+
import pytest
6+
7+
from databusclient.cli import parse_distribution_str
8+
from databusclient.api.deploy import (
9+
create_dataset,
10+
_get_file_info_from_dict,
11+
)
12+
13+
14+
class TestParseDistributionStr:
15+
"""Unit tests for parse_distribution_str function."""
16+
17+
# -------------------------------------------------------------------------
18+
# URL Extraction Tests
19+
# -------------------------------------------------------------------------
20+
21+
def test_basic_url_extraction(self):
22+
"""Test that URL is correctly extracted from distribution string."""
23+
result = parse_distribution_str("http://example.com/data.json")
24+
assert result["url"] == "http://example.com/data.json"
25+
26+
def test_url_with_modifiers(self):
27+
"""Test URL extraction when modifiers are present."""
28+
result = parse_distribution_str("http://example.com/data.json|lang=en|.json")
29+
assert result["url"] == "http://example.com/data.json"
30+
31+
# -------------------------------------------------------------------------
32+
# Content Variant Parsing Tests
33+
# -------------------------------------------------------------------------
34+
35+
def test_single_variant(self):
36+
"""Test parsing a single key=value variant."""
37+
result = parse_distribution_str("http://example.com/file|lang=en")
38+
assert result["variants"] == {"lang": "en"}
39+
40+
def test_multiple_variants(self):
41+
"""Test parsing multiple key=value variants."""
42+
result = parse_distribution_str("http://example.com/file|lang=en|type=full|quality=high")
43+
assert result["variants"] == {
44+
"lang": "en",
45+
"type": "full",
46+
"quality": "high",
47+
}
48+
49+
def test_variant_with_equals_in_value(self):
50+
"""Test variant where value contains equals sign."""
51+
result = parse_distribution_str("http://example.com/file|filter=a=b")
52+
assert result["variants"] == {"filter": "a=b"}
53+
54+
def test_empty_variants(self):
55+
"""Test that empty variants dict is returned when no variants present."""
56+
result = parse_distribution_str("http://example.com/file.json|.json")
57+
assert result["variants"] == {}
58+
59+
# -------------------------------------------------------------------------
60+
# Format Extension Tests
61+
# -------------------------------------------------------------------------
62+
63+
def test_json_extension(self):
64+
"""Test .json format extension detection."""
65+
result = parse_distribution_str("http://example.com/file|.json")
66+
assert result["formatExtension"] == "json"
67+
68+
def test_ttl_extension(self):
69+
"""Test .ttl format extension detection."""
70+
result = parse_distribution_str("http://example.com/file|.ttl")
71+
assert result["formatExtension"] == "ttl"
72+
73+
def test_csv_extension(self):
74+
"""Test .csv format extension detection."""
75+
result = parse_distribution_str("http://example.com/file|.csv")
76+
assert result["formatExtension"] == "csv"
77+
78+
def test_xml_extension(self):
79+
"""Test .xml format extension detection."""
80+
result = parse_distribution_str("http://example.com/file|.xml")
81+
assert result["formatExtension"] == "xml"
82+
83+
def test_no_extension(self):
84+
"""Test that formatExtension is None when not provided."""
85+
result = parse_distribution_str("http://example.com/file|lang=en")
86+
assert result["formatExtension"] is None
87+
88+
# -------------------------------------------------------------------------
89+
# Compression Detection Tests
90+
# -------------------------------------------------------------------------
91+
92+
def test_gz_compression(self):
93+
"""Test .gz compression detection."""
94+
result = parse_distribution_str("http://example.com/file|.gz")
95+
assert result["compression"] == "gz"
96+
97+
def test_zip_compression(self):
98+
"""Test .zip compression detection."""
99+
result = parse_distribution_str("http://example.com/file|.zip")
100+
assert result["compression"] == "zip"
101+
102+
def test_br_compression(self):
103+
"""Test .br (brotli) compression detection."""
104+
result = parse_distribution_str("http://example.com/file|.br")
105+
assert result["compression"] == "br"
106+
107+
def test_tar_compression(self):
108+
"""Test .tar compression detection."""
109+
result = parse_distribution_str("http://example.com/file|.tar")
110+
assert result["compression"] == "tar"
111+
112+
def test_zst_compression(self):
113+
"""Test .zst (zstandard) compression detection."""
114+
result = parse_distribution_str("http://example.com/file|.zst")
115+
assert result["compression"] == "zst"
116+
117+
def test_no_compression(self):
118+
"""Test that compression is None when not provided."""
119+
result = parse_distribution_str("http://example.com/file|.json")
120+
assert result["compression"] is None
121+
122+
# -------------------------------------------------------------------------
123+
# Combined Modifiers Tests
124+
# -------------------------------------------------------------------------
125+
126+
def test_full_distribution_string(self):
127+
"""Test parsing a complete distribution string with all modifiers."""
128+
result = parse_distribution_str(
129+
"http://mysite.com/data.json|lang=fr|quality=high|.json|.gz"
130+
)
131+
assert result == {
132+
"url": "http://mysite.com/data.json",
133+
"variants": {"lang": "fr", "quality": "high"},
134+
"formatExtension": "json",
135+
"compression": "gz",
136+
}
137+
138+
def test_order_independence(self):
139+
"""Test that order of modifiers doesn't affect parsing."""
140+
result = parse_distribution_str(
141+
"http://example.com/file|.gz|lang=en|.json|type=full"
142+
)
143+
assert result["variants"] == {"lang": "en", "type": "full"}
144+
assert result["formatExtension"] == "json"
145+
assert result["compression"] == "gz"
146+
147+
# -------------------------------------------------------------------------
148+
# Edge Cases
149+
# -------------------------------------------------------------------------
150+
151+
def test_whitespace_handling(self):
152+
"""Test that whitespace is properly stripped."""
153+
result = parse_distribution_str("http://example.com/file | lang = en | .json ")
154+
assert result["url"] == "http://example.com/file"
155+
assert result["variants"] == {"lang": "en"}
156+
assert result["formatExtension"] == "json"
157+
158+
def test_standalone_tag_warning(self, capsys):
159+
"""Test that standalone tags (without =) produce a warning."""
160+
result = parse_distribution_str("http://example.com/file|unknown_tag")
161+
captured = capsys.readouterr()
162+
assert "WARNING" in captured.out
163+
assert "unknown_tag" in captured.out
164+
# Standalone tags should not be added to variants
165+
assert "unknown_tag" not in result["variants"]
166+
167+
def test_url_only(self):
168+
"""Test parsing URL without any modifiers."""
169+
result = parse_distribution_str("http://example.com/data.json")
170+
assert result == {
171+
"url": "http://example.com/data.json",
172+
"variants": {},
173+
"formatExtension": None,
174+
"compression": None,
175+
}
176+
177+
178+
class TestIntegrationWithDeployAPI:
179+
"""Integration tests verifying parsed dicts work with api_deploy functions."""
180+
181+
@patch("databusclient.api.deploy._load_file_stats")
182+
def test_get_file_info_from_dict_basic(self, mock_load_stats):
183+
"""Test _get_file_info_from_dict with parsed distribution dict."""
184+
mock_load_stats.return_value = ("abc123" * 10 + "abcd", 12345)
185+
186+
parsed = parse_distribution_str(
187+
"http://example.com/data.json|lang=en|type=full|.json|.gz"
188+
)
189+
cvs, ext, comp, sha, size = _get_file_info_from_dict(parsed)
190+
191+
assert cvs == {"lang": "en", "type": "full"}
192+
assert ext == "json"
193+
assert comp == "gz"
194+
assert sha == "abc123" * 10 + "abcd"
195+
assert size == 12345
196+
197+
@patch("databusclient.api.deploy._load_file_stats")
198+
def test_get_file_info_from_dict_defaults(self, mock_load_stats):
199+
"""Test default values when extension/compression not specified."""
200+
mock_load_stats.return_value = ("sha256hash", 1000)
201+
202+
parsed = parse_distribution_str("http://example.com/data|lang=en")
203+
cvs, ext, comp, sha, size = _get_file_info_from_dict(parsed)
204+
205+
# Should use defaults
206+
assert ext == "file" # default when not specified
207+
assert comp == "none" # default when not specified
208+
209+
@patch("databusclient.api.deploy._load_file_stats")
210+
def test_create_dataset_with_dict_distributions(self, mock_load_stats):
211+
"""Test create_dataset accepts parsed dict distributions."""
212+
fake_sha = "a" * 64
213+
mock_load_stats.return_value = (fake_sha, 5000)
214+
215+
parsed_dist = parse_distribution_str(
216+
"http://example.com/file.json|lang=en|.json"
217+
)
218+
219+
dataset = create_dataset(
220+
version_id="https://databus.example.org/user/group/artifact/2024.01.01/",
221+
title="Test Dataset",
222+
abstract="Test abstract",
223+
description="Test description",
224+
license_url="https://example.org/license",
225+
distributions=[parsed_dist],
226+
)
227+
228+
# Verify dataset structure
229+
assert "@context" in dataset
230+
assert "@graph" in dataset
231+
232+
# Find distribution in graph
233+
graphs = dataset["@graph"]
234+
version_graph = next(
235+
(g for g in graphs if "@type" in g and "Version" in g.get("@type", [])),
236+
None,
237+
)
238+
assert version_graph is not None
239+
assert "distribution" in version_graph
240+
241+
dist = version_graph["distribution"][0]
242+
assert dist["downloadURL"] == "http://example.com/file.json"
243+
assert dist["formatExtension"] == "json"
244+
assert dist["dcv:lang"] == "en"
245+
246+
@patch("databusclient.api.deploy._load_file_stats")
247+
def test_create_dataset_multiple_distributions(self, mock_load_stats):
248+
"""Test create_dataset with multiple distributions requires variants."""
249+
fake_sha = "b" * 64
250+
mock_load_stats.return_value = (fake_sha, 3000)
251+
252+
dist1 = parse_distribution_str("http://example.com/en.json|lang=en|.json")
253+
dist2 = parse_distribution_str("http://example.com/de.json|lang=de|.json")
254+
255+
dataset = create_dataset(
256+
version_id="https://databus.example.org/user/group/artifact/2024.01.01/",
257+
title="Test Dataset",
258+
abstract="Test abstract",
259+
description="Test description",
260+
license_url="https://example.org/license",
261+
distributions=[dist1, dist2],
262+
)
263+
264+
# Both distributions should be present
265+
graphs = dataset["@graph"]
266+
version_graph = next(
267+
(g for g in graphs if "@type" in g and "Version" in g.get("@type", [])),
268+
None,
269+
)
270+
distributions = version_graph["distribution"]
271+
assert len(distributions) == 2
272+
273+
# Verify different language variants
274+
langs = {d["dcv:lang"] for d in distributions}
275+
assert langs == {"en", "de"}

0 commit comments

Comments
 (0)