Skip to content

Commit f6ba749

Browse files
authored
Merge pull request #42 from INCATools/gzip-support
Making gzipped files the default for S3 upload/download
2 parents 0dd6ac0 + aa323f3 commit f6ba749

4 files changed

Lines changed: 67 additions & 6 deletions

File tree

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ SELECTED_ONTS = obi mondo go envo ro hp mp zfa wbphenotype ecto upheno uberon_cm
1313

1414
TEST_ONTOLOGIES = go-nucleus robot-example
1515

16-
all: $(patsubst %,all-%,$(ALL_OBO_ONTS))
16+
all: build_all stage_all
17+
build_all: $(patsubst %,all-%,$(ALL_OBO_ONTS))
18+
stage_all: $(patsubst %,stage/%.db.gz,$(ALL_OBO_ONTS))
19+
1720
selected: $(patsubst %,all-%,$(SELECTED_ONTS))
1821

1922
all-%: db/%.db
2023
sqlite3 $< "SELECT COUNT(*) FROM statements"
24+
stage/%.db.gz: db/%.db
25+
gzip -c $< > $@.tmp && mv $@.tmp $@
2126

2227
# INSTALL
2328
include install.Makefile
@@ -49,6 +54,7 @@ realclean-%:
4954
# Prefixes
5055
# ---
5156
# TODO: sync with bioregistry
57+
# NOTE: this is now managed in build folder
5258

5359
build_prefixes: $(PREFIX_DIR)/prefixes.csv
5460

@@ -197,5 +203,5 @@ bin/%:
197203
DATE = $(shell date -u +"%Y-%m-%d")
198204

199205
s3-deploy:
200-
aws s3 sync db s3://bbop-sqlite --acl public-read && \
201-
aws s3 sync db s3://bbop-sqlite/releases/$(DATE) --acl public-read
206+
aws s3 sync stage s3://bbop-sqlite --acl public-read && \
207+
aws s3 sync stage s3://bbop-sqlite/releases/$(DATE) --acl public-read

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "semsql"
3-
version = "0.1.6"
3+
version = "0.1.7"
44
description = ""
55
authors = ["cmungall <cjm@berkeleybop.org>"]
66

src/semsql/builder/builder.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import gzip
12
import logging
23
import os
4+
import shutil
35
import subprocess
46
from dataclasses import field
57
from pathlib import Path
@@ -70,9 +72,16 @@ def download_obo_sqlite(ontology: str, destination: str):
7072
:param destination:
7173
:return:
7274
"""
73-
url = f'https://s3.amazonaws.com/bbop-sqlite/{ontology}.db'
75+
db = f'{ontology}.db'
76+
url = f'https://s3.amazonaws.com/bbop-sqlite/{db}.gz'
7477
r = requests.get(url, allow_redirects=True)
75-
open(destination, 'wb').write(r.content)
78+
destination_gzip = f'{destination}.gz'
79+
open(destination_gzip, 'wb').write(r.content)
80+
with gzip.open(destination_gzip, 'rb') as f_in:
81+
with open(destination, 'wb') as f_out:
82+
shutil.copyfileobj(f_in, f_out)
83+
os.remove(destination_gzip)
84+
7685

7786

7887
def connect(owl_file: str):

src/semsql/linkml/similarity.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: semsql_similarity
2+
description: Module for representing and calculating similarities
3+
title: Semantic similarity module
4+
id: https://w3id.org/semsql/nlp
5+
imports:
6+
- rdf
7+
- relation_graph
8+
license: https://creativecommons.org/publicdomain/zero/1.0/
9+
prefixes:
10+
semsql_similarity: https://w3id.org/semsql/similarity
11+
linkml: https://w3id.org/linkml/
12+
default_curi_maps:
13+
- semweb_context
14+
default_prefix: semsql_nlp
15+
default_range: string
16+
17+
classes:
18+
node_pairwise_similarity:
19+
abstract: true
20+
slots:
21+
- node1
22+
- node2
23+
node_pairwise_graph_similarity:
24+
is_a: node_pairwise_similarity
25+
abstract: true
26+
slots:
27+
- num_ancestors
28+
- predicate1
29+
- predicate2
30+
node_pairwise_overlap:
31+
is_a: node_pairwise_graph_similarity
32+
comments:
33+
- |-
34+
sqlview>>
35+
SELECT
36+
e1.subject AS node1,
37+
e2.subject AS node2,
38+
e1.predicate AS predicate1,
39+
e2.predicate AS predicate2,
40+
COUNT(DISTINCT e1.object) AS num_ancestors
41+
FROM entailed_edge AS e1 AND
42+
entailed_edge AS e2
43+
WHERE e1.object = e2.object
44+
45+
46+

0 commit comments

Comments
 (0)