Skip to content

Commit b87daa0

Browse files
committed
competency: rework pg_import.py utility to use mapknowledge.CompetencyDatabase rather than duplicate much of its functionality.
1 parent e7b1dff commit b87daa0

3 files changed

Lines changed: 329 additions & 272 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ dependencies = [
1919
"rich>=13.9.4",
2020
"pyyaml>=6.0.2",
2121
"asyncpg>=0.30.0",
22-
"flatmapknowledge @ https://github.com/AnatomicMaps/flatmap-knowledge/releases/download/v2.3.0/flatmapknowledge-2.3.0-py3-none-any.whl",
23-
"mapmaker @ https://github.com/AnatomicMaps/flatmap-maker/releases/download/v1.18.0/mapmaker-1.18.0-py3-none-any.whl",
22+
"flatmapknowledge @ https://github.com/AnatomicMaps/flatmap-knowledge/releases/download/v2.4.2/flatmapknowledge-2.4.2-py3-none-any.whl",
23+
"mapknowledge @ https://github.com/AnatomicMaps/map-knowledge/releases/download/v1.2.2/mapknowledge-1.2.2-py3-none-any.whl",
24+
"mapmaker @ https://github.com/AnatomicMaps/flatmap-maker/releases/download/v1.19.2/mapmaker-1.19.2-py3-none-any.whl",
2425
"granian>=2.2.5",
2526
]
2627

tools/pg_import.py

Lines changed: 83 additions & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Flatmap viewer and annotation tools
44
#
5-
# Copyright (c) 2019-21 David Brooks
5+
# Copyright (c) 2019-25 David Brooks
66
#
77
# Licensed under the Apache License, Version 2.0 (the "License");
88
# you may not use this file except in compliance with the License.
@@ -18,21 +18,21 @@
1818
#
1919
#===============================================================================
2020

21-
import json
21+
from collections import defaultdict
2222
import logging
2323
import os
24-
from typing import Any, Optional
25-
from tqdm import tqdm
26-
import pathlib
2724

2825
#===============================================================================
2926

3027
import psycopg as pg
31-
from landez.sources import MBTilesReader
3228

3329
#===============================================================================
3430

35-
from mapknowledge import KnowledgeStore, NERVE_TYPE
31+
from mapknowledge import NERVE_TYPE
32+
from mapknowledge.competency import clean_knowledge_source, CompetencyDatabase
33+
from mapknowledge.competency import KnowledgeList, KnowledgeSource
34+
from mapserver.settings import settings
35+
from mapserver.utils import json_map_metadata
3636

3737
#===============================================================================
3838

@@ -42,263 +42,96 @@
4242

4343
KNOWLEDGE_USER = os.environ.get('KNOWLEDGE_USER')
4444
KNOWLEDGE_HOST = os.environ.get('KNOWLEDGE_HOST', 'localhost:5432')
45-
FLATMAP_ROOT = os.environ.get('FLATMAP_ROOT')
4645

4746
#===============================================================================
4847

49-
def clean_source(source: str) -> str:
50-
if source.endswith('-npo'):
51-
return source[:-4]
52-
return source
48+
# Used by `json_map_metadata`
5349

54-
#===============================================================================
55-
56-
type KnowledgeDict = dict[str, Any]
57-
58-
class KnowledgeList:
59-
def __init__(self, source: str, knowledge: Optional[list[KnowledgeDict]]=None):
60-
self.__source = clean_source(source)
61-
if knowledge is None:
62-
self.__knowledge: list[KnowledgeDict] = []
63-
else:
64-
self.__knowledge = knowledge
65-
66-
@property
67-
def source(self):
68-
return self.__source
69-
70-
@property
71-
def knowledge(self):
72-
return self.__knowledge
73-
74-
def append(self, knowledge: KnowledgeDict):
75-
self.__knowledge.append(knowledge)
50+
settings['FLATMAP_ROOT'] = os.environ.get('FLATMAP_ROOT', './flatmaps')
7651

7752
#===============================================================================
7853

79-
NODE_PHENOTYPES = [
80-
'ilxtr:hasSomaLocatedIn',
81-
'ilxtr:hasAxonPresynapticElementIn',
82-
'ilxtr:hasAxonSensorySubcellularElementIn',
83-
'ilxtr:hasAxonLeadingToSensorySubcellularElementIn',
84-
'ilxtr:hasAxonLocatedIn',
85-
'ilxtr:hasDendriteLocatedIn',
86-
]
87-
NODE_TYPES = [
88-
NERVE_TYPE,
89-
]
90-
91-
def setup_anatomical_types(cursor):
92-
#==================================
93-
cursor.execute('DELETE FROM anatomical_types at WHERE NOT EXISTS (SELECT 1 FROM path_node_types pt WHERE at.type_id = pt.type_id)')
94-
cursor.executemany('INSERT INTO anatomical_types (type_id, label) VALUES (%s, %s) ON CONFLICT DO NOTHING',
95-
[(type, type) for type in NODE_PHENOTYPES + NODE_TYPES])
96-
97-
#===============================================================================
98-
99-
def delete_source_from_tables(cursor, source: str):
100-
#==================================================
101-
cursor.execute('DELETE FROM path_taxons WHERE source_id=%s', (source, ))
102-
cursor.execute('DELETE FROM feature_evidence WHERE source_id=%s', (source, ))
103-
cursor.execute('DELETE FROM path_edges WHERE source_id=%s', (source, ))
104-
cursor.execute('DELETE FROM path_features WHERE source_id=%s', (source, ))
105-
cursor.execute('DELETE FROM path_node_features WHERE source_id=%s', (source, ))
106-
cursor.execute('DELETE FROM path_forward_connections WHERE source_id=%s', (source, ))
107-
cursor.execute('DELETE FROM path_node_types WHERE source_id=%s', (source, ))
108-
cursor.execute('DELETE FROM path_phenotypes WHERE source_id=%s', (source, ))
109-
cursor.execute('DELETE FROM path_properties WHERE source_id=%s', (source, ))
110-
cursor.execute('DELETE FROM path_nodes WHERE source_id=%s', (source, ))
111-
cursor.execute('DELETE FROM feature_types WHERE source_id=%s', (source, ))
112-
cursor.execute('DELETE FROM feature_terms WHERE source_id=%s', (source, ))
113-
114-
def update_connectivity(cursor, knowledge: KnowledgeList):
115-
#=========================================================
116-
source = knowledge.source
117-
progress_bar = tqdm(total=len(knowledge.knowledge),
118-
unit='records', ncols=80,
119-
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')
120-
for record in knowledge.knowledge:
121-
if source == clean_source(record.get('source', '')):
122-
if (connectivity := record.get('connectivity')) is not None:
123-
path_id = record['id']
124-
125-
# Taxons
126-
taxons = record.get('taxons', ['NCBITaxon:40674'])
127-
cursor.executemany('INSERT INTO taxons (taxon_id) VALUES (%s) ON CONFLICT DO NOTHING',
128-
((taxon,) for taxon in taxons))
129-
130-
# Path taxons
131-
with cursor.copy("COPY path_taxons (source_id, path_id, taxon_id) FROM STDIN") as copy:
132-
for taxon in taxons:
133-
copy.write_row((source, path_id, taxon))
134-
135-
# Evidence
136-
evidence = record.get('references', [])
137-
cursor.executemany('INSERT INTO evidence (evidence_id) VALUES (%s) ON CONFLICT DO NOTHING',
138-
((evidence,) for evidence in evidence))
139-
140-
# Path evidence
141-
with cursor.copy("COPY feature_evidence (source_id, term_id, evidence_id) FROM STDIN") as copy:
142-
for evidence_id in evidence:
143-
copy.write_row((source, path_id, evidence_id))
144-
145-
# Nodes
146-
nodes = set(json.dumps(node) for (node, _) in connectivity) | set(json.dumps(node) for (_, node) in connectivity)
147-
cursor.executemany('INSERT INTO path_nodes (source_id, path_id, node_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING',
148-
((source, path_id, node,) for node in nodes))
149-
150-
# Node features
151-
node_features = [ (source, path_id, node, feature)
152-
for (node, features) in [(node, json.loads(node)) for node in nodes]
153-
for feature in [features[0]] + features[1] ]
154-
cursor.executemany('INSERT INTO path_node_features (source_id, path_id, node_id, feature_id) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING',
155-
node_features)
156-
157-
# Path edges
158-
path_nodes = [ (source, path_id, json.dumps(node_0), json.dumps(node_1)) for (node_0, node_1) in connectivity ]
159-
with cursor.copy("COPY path_edges (source_id, path_id, node_0, node_1) FROM STDIN") as copy:
160-
for row in path_nodes:
161-
copy.write_row(row)
162-
163-
# Path features
164-
path_features = [(source, path_id, feature) for feature in set([nf[3] for nf in node_features])]
165-
with cursor.copy("COPY path_features (source_id, path_id, feature_id) FROM STDIN") as copy:
166-
for row in path_features:
167-
copy.write_row(row)
168-
169-
# Forward connections
170-
forward_connections = [(source, path_id, forward_path) for forward_path in record.get('forward-connections', [])]
171-
with cursor.copy("COPY path_forward_connections (source_id, path_id, forward_path_id) FROM STDIN") as copy:
172-
for row in forward_connections:
173-
copy.write_row(row)
174-
175-
# Path node types
176-
node_types = []
177-
node_phenotypes = record.get('node-phenotypes', {})
178-
for type, nodes in node_phenotypes.items():
179-
node_types.extend([(source, path_id, json.dumps(node), type)
180-
for node in nodes])
181-
node_types.extend([(source, path_id, json.dumps(node), NERVE_TYPE)
182-
for node in record.get('nerves', [])])
183-
with cursor.copy("COPY path_node_types (source_id, path_id, node_id, type_id) FROM STDIN") as copy:
184-
for row in node_types:
185-
copy.write_row(row)
186-
187-
# Path phenotypes
188-
with cursor.copy("COPY path_phenotypes (source_id, path_id, phenotype) FROM STDIN") as copy:
189-
for phenotype in record.get('phenotypes', []):
190-
copy.write_row((source, path_id, phenotype))
191-
192-
# General path properties
193-
cursor.execute('INSERT INTO path_properties (source_id, path_id, biological_sex, alert, disconnected) VALUES (%s, %s, %s, %s, %s)',
194-
(source, path_id, record.get('biologicalSex'), record.get('alert'), record.get('pathDisconnected')))
195-
196-
progress_bar.update(1)
197-
progress_bar.close()
198-
199-
def update_features(cursor, knowledge: KnowledgeList):
200-
#=====================================================
201-
source = knowledge.source
202-
cursor.execute('DELETE FROM feature_terms WHERE source_id=%s', (source, ))
203-
204-
for record in knowledge.knowledge:
205-
if source == clean_source(record.get('source', '')):
206-
207-
# Feature terms
208-
with cursor.copy("COPY feature_terms (source_id, term_id, label, description) FROM STDIN") as copy:
209-
copy.write_row([source, record['id'], record.get('label'), record.get('long-label')])
210-
211-
# Feature types
212-
with cursor.copy("COPY feature_types (source_id, term_id, type_id) FROM STDIN") as copy:
213-
if (term_type:=record.get('type')) is not None:
214-
copy.write_row([source, record['id'], term_type])
215-
216-
def update_knowledge_source(cursor, source):
217-
#===========================================
218-
cursor.execute('INSERT INTO knowledge_sources (source_id) VALUES (%s) ON CONFLICT DO NOTHING', (source,))
219-
220-
#===============================================================================
221-
222-
def pg_import(uuid):
223-
#=======================================
224-
knowledge = map_knowledge(uuid)
225-
user = f'{KNOWLEDGE_USER}@' if KNOWLEDGE_USER else ''
226-
with pg.connect(f'postgresql://{user}{KNOWLEDGE_HOST}/{PG_DATABASE}') as db:
227-
with db.cursor() as cursor:
228-
delete_source_from_tables(cursor, knowledge.source)
229-
setup_anatomical_types(cursor)
230-
update_knowledge_source(cursor, knowledge.source)
231-
update_features(cursor, knowledge)
232-
update_connectivity(cursor, knowledge)
233-
db.commit()
234-
235-
#===============================================================================
236-
237-
def map_knowledge(uuid) -> KnowledgeList:
238-
#========================================
239-
mbtiles = pathlib.Path(FLATMAP_ROOT) / uuid / 'index.mbtiles'
240-
if not mbtiles.exists():
241-
raise FileNotFoundError(f"MBTiles file not found at: {mbtiles}")
242-
243-
store = KnowledgeStore(
244-
store_directory = FLATMAP_ROOT,
245-
knowledge_base = DEFAULT_STORE,
246-
read_only = False,
247-
use_sckan = False
248-
)
249-
250-
reader = MBTilesReader(mbtiles)
251-
252-
# Load metadata
253-
row = reader._query("SELECT value FROM metadata WHERE name='metadata'").fetchone()
254-
metadata = json.loads(row[0])
255-
if uuid != metadata.get('uuid'):
54+
def get_map_knowledge(map_uuid: str, competency_db: CompetencyDatabase) -> KnowledgeList:
55+
#========================================================================================
56+
metadata = json_map_metadata(map_uuid, 'metadata')
57+
if map_uuid != metadata.get('uuid'):
25658
raise IOError("Flatmap source UUID doesn't match the provided UUID.")
25759

25860
sckan_release = metadata.get('connectivity', {}).get('npo', {}).get('release')
259-
260-
# Load pathways
261-
row = reader._query("SELECT value FROM metadata WHERE name='pathways'").fetchone()
262-
pathways = json.loads(row[0]).get('paths', {})
61+
map_knowledge_source = clean_knowledge_source(sckan_release)
62+
63+
annotations = json_map_metadata(map_uuid, 'annotations')
64+
annotated_features = { models: feature
65+
for feature in annotations.values()
66+
if (models := feature.get('models')) is not None }
67+
descriptions = { row[0]: row[1]
68+
for row in competency_db.execute(
69+
'select term_id, description from feature_terms where source_id=%s', (map_knowledge_source,)) }
70+
path_properties = {}
71+
for row in competency_db.execute(
72+
'select path_id, alert, biological_sex, disconnected from path_properties where source_id=%s', (map_knowledge_source,)):
73+
properties = {}
74+
if row[1] is not None:
75+
properties['alert'] = row[1]
76+
if row[2] is not None:
77+
properties['biologicalSex'] = row[2]
78+
if row[3] is not None:
79+
properties['pathDisconnected'] = row[3]
80+
path_properties[row[0]] = properties
81+
82+
path_evidence = defaultdict(list)
83+
for row in competency_db.execute(
84+
'select term_id, evidence_id from feature_evidence where source_id=%s', (map_knowledge_source,)):
85+
path_evidence[row[0]].append(row[1])
86+
87+
path_phenotypes = defaultdict(list)
88+
for row in competency_db.execute(
89+
'select path_id, phenotype from path_phenotypes where source_id=%s', (map_knowledge_source,)):
90+
path_phenotypes[row[0]].append(row[1])
91+
92+
# Collect all map knowledge
26393
knowledge_terms = {}
26494

265-
for path_id, path in pathways.items():
266-
if 'connectivity' not in path:
95+
# Path features (i.e. those with connectivity)
96+
pathways = json_map_metadata(map_uuid, 'pathways').get('paths', {})
97+
for path_id, path_knowledge in pathways.items():
98+
if 'connectivity' not in path_knowledge:
26799
continue
268-
269-
db_knowledge = store.entity_knowledge(path_id, sckan_release)
100+
annotations = annotated_features.get(path_id, {})
101+
properties = path_properties.get(path_id, {})
270102
knowledge_terms[path_id] = {
271103
'id': path_id,
272-
'label': db_knowledge['label'],
273-
'long-label': db_knowledge['long-label'],
274-
'connectivity': path['connectivity'],
275-
'taxons': [metadata.get('taxon', '')],
276-
'forward-connections': path['forward-connections'],
277-
'node-phenotypes': path['node-phenotypes'],
278-
'nerves': path.get('node-nerves', []),
279-
'pathDisconnected': db_knowledge['pathDisconnected'],
280-
'phenotypes': db_knowledge.get('phenotypes', []),
281-
'source': uuid,
282-
'references': db_knowledge.get('references', []),
283-
'alert': db_knowledge.get('alert', [])
104+
'source': map_uuid,
105+
'label': annotations['label'],
106+
'long-label': descriptions[path_id],
107+
'connectivity': path_knowledge['connectivity'],
108+
'taxons': annotations.get('taxons', []),
109+
'forward-connections': path_knowledge['forward-connections'],
110+
'node-phenotypes': path_knowledge['node-phenotypes'],
111+
'nerves': path_knowledge.get('node-nerves', []),
112+
'phenotypes': path_phenotypes.get(path_id, []),
113+
'references': path_evidence.get(path_id, []),
284114
}
285-
286-
# Load annotations
287-
row = reader._query("SELECT value FROM metadata WHERE name='annotations'").fetchone()
288-
annotations = json.loads(row[0])
289-
290-
for feature in annotations.values():
291-
model = feature.get('models')
292-
if model and model not in knowledge_terms:
293-
db_knowledge = store.entity_knowledge(model, sckan_release)
294-
knowledge_terms[model] = {
295-
'id': model,
296-
'label': db_knowledge['label'],
297-
'source': uuid,
298-
**({'type': db_knowledge['type']} if 'type' in db_knowledge else {})
115+
if 'alert' in properties:
116+
knowledge_terms[path_id]['alert'] = properties['alert']
117+
if 'biologicalSex' in properties:
118+
knowledge_terms[path_id]['biologicalSex'] = properties['biologicalSex']
119+
if 'pathDisconnected' in properties:
120+
knowledge_terms[path_id]['pathDisconnected'] = properties['pathDisconnected']
121+
122+
# Non-path features with an anatomical term
123+
for feature_id, properties in annotated_features.items():
124+
if feature_id not in knowledge_terms:
125+
knowledge_terms[feature_id] = {
126+
'id': feature_id,
127+
'source': map_uuid,
128+
'label': properties['label'],
129+
'long-label': descriptions[feature_id],
299130
}
131+
if properties.get('type') == 'nerve':
132+
knowledge_terms[feature_id]['type'] = NERVE_TYPE
300133

301-
return KnowledgeList(uuid, list(knowledge_terms.values()))
134+
return KnowledgeList(KnowledgeSource(map_uuid, sckan_release, metadata['name']), list(knowledge_terms.values()))
302135

303136
#===============================================================================
304137

@@ -313,7 +146,10 @@ def main():
313146

314147
if not args.quiet:
315148
logging.basicConfig(level=logging.INFO)
316-
pg_import(args.uuid)
149+
150+
competency_db = CompetencyDatabase(KNOWLEDGE_USER, KNOWLEDGE_HOST, PG_DATABASE)
151+
knowledge = get_map_knowledge(args.uuid, competency_db)
152+
competency_db.import_knowledge(knowledge)
317153

318154
#===============================================================================
319155

0 commit comments

Comments
 (0)