Skip to content

Commit bf82143

Browse files
committed
add support for loading zip file, load as tabular in case of tsv or csv formst, support read options
1 parent 1d37d4a commit bf82143

6 files changed

Lines changed: 132 additions & 35 deletions

File tree

README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,21 @@ Validate the data explicitly and get a list of errors
5656
Package::validate("tests/fixtures/simple_invalid_datapackage.json"); // array of validation errors
5757
```
5858

59+
Load a zip file
60+
61+
```php
62+
$package = Package::load('http://datahub.io/opendatafortaxjustice/eucountrydatawb/r/datapackage_zip.zip');
63+
```
64+
65+
Provide read options which are passed through to [tableschema-php](https://github.com/frictionlessdata/tableschema-php) Table::read method
66+
67+
```php
68+
$package = Package::load('http://datahub.io/opendatafortaxjustice/eucountrydatawb/r/datapackage_zip.zip');
69+
foreach ($package as $resource) {
70+
$resource->read(["cast" => false]);
71+
}
72+
```
73+
5974
The package object has some useful methods to access and manipulate the resources
6075

6176
```php
@@ -106,7 +121,6 @@ Save the entire datapackage including any local data to a zip file
106121
$package->save("datapackage.zip");
107122
```
108123

109-
110124
### Resource
111125

112126
Resource objects can be accessed from a Package as described above

src/Factory.php

Lines changed: 70 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use frictionlessdata\datapackage\Datapackages\BaseDatapackage;
66
use frictionlessdata\datapackage\Resources\BaseResource;
7+
use Alchemy\Zippy\Zippy;
78

89
/**
910
* datapackage and resource have different classes depending on the corresponding profile
@@ -23,6 +24,7 @@ class Factory
2324
* - JSON encoded object
2425
* - URL (must be in either 'http' or 'https' schemes)
2526
* - local filesystem (POSIX) path.
27+
* - local or remote zip file
2628
*
2729
* @param mixed $source
2830
* @param null|string $basePath optional, required only if you want to use relative paths
@@ -269,37 +271,45 @@ protected static function loadSource($source, $basePath)
269271
);
270272
}
271273
} elseif (static::isHttpSource($source)) {
272-
try {
273-
$descriptor = json_decode(file_get_contents(static::normalizeHttpSource($source)));
274-
} catch (\Exception $e) {
275-
throw new Exceptions\DatapackageInvalidSourceException(
276-
'Failed to load source: '.json_encode($source).': '.$e->getMessage()
277-
);
274+
if (static::isHttpZipSource($source)) {
275+
return static::loadHttpZipSource($source);
276+
} else {
277+
try {
278+
$descriptor = json_decode(file_get_contents(static::normalizeHttpSource($source)));
279+
} catch (\Exception $e) {
280+
throw new Exceptions\DatapackageInvalidSourceException(
281+
'Failed to load source: '.json_encode($source).': '.$e->getMessage()
282+
);
283+
}
284+
// http sources don't allow relative paths, hence basePath should remain null
285+
$basePath = null;
278286
}
279-
// http sources don't allow relative paths, hence basePath should remain null
280-
$basePath = null;
281287
} else {
282288
// not a json string and not a url - assume it's a file path
283-
if (empty($basePath)) {
284-
// no basePath
285-
// - assume source is the absolute path of the file
286-
// - set it's directory as the basePath
287-
$basePath = dirname($source);
289+
if (static::isFileZipSource($source)) {
290+
return static::loadFileZipSource($source);
288291
} else {
289-
// got a basePath
290-
// - try to prepend it to the source and see if such a file exists
291-
// - if not - assume it's an absolute path
292-
$absPath = $basePath.DIRECTORY_SEPARATOR.$source;
293-
if (file_exists($absPath)) {
294-
$source = $absPath;
292+
if (empty($basePath)) {
293+
// no basePath
294+
// - assume source is the absolute path of the file
295+
// - set it's directory as the basePath
296+
$basePath = dirname($source);
297+
} else {
298+
// got a basePath
299+
// - try to prepend it to the source and see if such a file exists
300+
// - if not - assume it's an absolute path
301+
$absPath = $basePath.DIRECTORY_SEPARATOR.$source;
302+
if (file_exists($absPath)) {
303+
$source = $absPath;
304+
}
305+
}
306+
try {
307+
$descriptor = json_decode(file_get_contents($source));
308+
} catch (\Exception $e) {
309+
throw new Exceptions\DatapackageInvalidSourceException(
310+
'Failed to load source: '.json_encode($source).': '.$e->getMessage()
311+
);
295312
}
296-
}
297-
try {
298-
$descriptor = json_decode(file_get_contents($source));
299-
} catch (\Exception $e) {
300-
throw new Exceptions\DatapackageInvalidSourceException(
301-
'Failed to load source: '.json_encode($source).': '.$e->getMessage()
302-
);
303313
}
304314
}
305315
} else {
@@ -310,4 +320,38 @@ protected static function loadSource($source, $basePath)
310320

311321
return (object) ['descriptor' => $descriptor, 'basePath' => $basePath];
312322
}
323+
324+
protected static function isHttpZipSource($source)
325+
{
326+
return (strtolower(substr($source, -4)) == '.zip');
327+
}
328+
329+
protected static function isFileZipSource($source)
330+
{
331+
return (strtolower(substr($source, -4)) == '.zip');
332+
}
333+
334+
protected static function loadHttpZipSource($source)
335+
{
336+
$tempfile = tempnam(sys_get_temp_dir(), 'datapackage-php');
337+
unlink($tempfile);
338+
$tempfile.='.zip';
339+
stream_copy_to_stream(fopen($source, 'r'), fopen($tempfile, 'w'));
340+
register_shutdown_function(function() use ($tempfile) {unlink($tempfile);});
341+
return self::loadFileZipSource($tempfile);
342+
}
343+
344+
protected static function loadFileZipSource($source)
345+
{
346+
$zippy = Zippy::load();
347+
$tempdir = tempnam(sys_get_temp_dir(), 'datapackage-php');
348+
unlink($tempdir);
349+
mkdir($tempdir);
350+
register_shutdown_function(function() use ($tempdir) {Utils::removeDir($tempdir);});
351+
$zippy->open($source)->extract($tempdir);
352+
if (!file_exists($tempdir."/datapackage.json")) {
353+
throw new Exceptions\DatapackageInvalidSourceException("zip file must contain a datappackage.json file");
354+
}
355+
return static::loadSource($tempdir."/datapackage.json", $tempdir);
356+
}
313357
}

src/Resources/BaseResource.php

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,31 @@ public static function handlesDescriptor($descriptor)
3737
return static::handlesProfile(Registry::getResourceValidationProfile($descriptor));
3838
}
3939

40-
public function read()
40+
public function read($readOptions=null)
4141
{
42+
$limit = ($readOptions && isset($readOptions["limit"])) ? $readOptions["limit"] : null;
4243
$rows = [];
43-
foreach ($this as $row) {
44-
$rows[] = $row;
44+
foreach ($this->dataStreams() as $dataStream) {
45+
if (isset($dataStream->table)) {
46+
$readOptions["limit"] = $limit;
47+
foreach ($dataStream->table->read($readOptions) as $row) {
48+
$rows[] = $row;
49+
if ($limit !== null) {
50+
$limit--;
51+
if ($limit < 0) break;
52+
}
53+
};
54+
} else {
55+
foreach ($dataStream as $row) {
56+
$rows[] = $row;
57+
if ($limit !== null) {
58+
$limit--;
59+
if ($limit < 0) break;
60+
}
61+
}
62+
}
63+
if ($limit !== null && $limit < 0) break;
4564
}
46-
4765
return $rows;
4866
}
4967

src/Resources/TabularResource.php

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use frictionlessdata\datapackage\DataStreams\TabularDataStream;
66
use frictionlessdata\datapackage\DataStreams\TabularInlineDataStream;
7+
use frictionlessdata\datapackage\Registry;
78

89
class TabularResource extends DefaultResource
910
{
@@ -41,8 +42,11 @@ protected function getInlineDataStream($data)
4142
]);
4243
}
4344

44-
protected static function handlesProfile($profile)
45+
public static function handlesDescriptor($descriptor)
4546
{
46-
return $profile == 'tabular-data-resource';
47+
return (
48+
Registry::getResourceValidationProfile($descriptor) == 'tabular-data-resource'
49+
|| (isset($descriptor->format) && in_array($descriptor->format, ['csv', 'tsv']))
50+
);
4751
}
4852
}

tests/DatapackageTest.php

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,23 @@ public function testCreateEditDatapackageDescriptor()
468468
$this->assertEquals("id,name\n1,one\n2,two\n3,three\n", file_get_contents($tempdir.'resource-1.csv'));
469469
}
470470

471+
public function testLoadDatapackageZip()
472+
{
473+
$package = Package::load(dirname(__FILE__).'/fixtures/datapackage_zip.zip');
474+
// $package = Package::load('http://datahub.io/opendatafortaxjustice/eucountrydatawb/r/datapackage_zip.zip');
475+
$this->assertEquals([[
476+
'jurisdiction' => 'Austria',
477+
'population in millions' => 8.7474000000000007,
478+
'GDP in $Billions' => 386.42779999999999,
479+
'GDP per cap' => 44176.519999999997
480+
], [
481+
'jurisdiction' => 'Belgium',
482+
'population in millions' => 11.3482,
483+
'GDP in $Billions' => 466.3657,
484+
'GDP per cap' => 41096.160000000003
485+
]], $package->resource('eucountrydatawb_csv')->read(["limit" => 2]));
486+
}
487+
471488
public function testStringPath()
472489
{
473490
$package = Package::create(['resources' => [
@@ -540,7 +557,7 @@ public function testDataHubCountryList()
540557
}
541558
}
542559
$this->assertEquals(['data_csv', 'data_json', 'datapackage_zip', 'data'], array_keys($resources));
543-
$this->assertEquals('Name,Code', trim($resources['data_csv'][0]));
560+
$this->assertEquals(["Name" => "Afghanistan", "Code" => "AF"], $resources['data_csv'][0]);
544561

545562
// now, let's try to load it but get it as tabular data
546563
$descriptor = json_decode(file_get_contents(dirname(__FILE__).'/fixtures/datahub-country-list/datapackage.json'));
@@ -569,7 +586,7 @@ public function testCommitteesPackage()
569586
foreach ($resource as $row) {
570587
++$rowNum;
571588
}
572-
$this->assertEquals(706, $rowNum);
589+
$this->assertEquals(702, $rowNum);
573590
++$resourceNum;
574591
}
575592
}

tests/fixtures/datapackage_zip.zip

3.75 KB
Binary file not shown.

0 commit comments

Comments
 (0)