22#
33# Flatmap viewer and annotation tools
44#
5- # Copyright (c) 2019-21 David Brooks
5+ # Copyright (c) 2019-25 David Brooks
66#
77# Licensed under the Apache License, Version 2.0 (the "License");
88# you may not use this file except in compliance with the License.
1818#
1919#===============================================================================
2020
21- import json
21+ from collections import defaultdict
2222import logging
2323import os
24- from typing import Any , Optional
25- from tqdm import tqdm
26- import pathlib
2724
2825#===============================================================================
2926
3027import psycopg as pg
31- from landez .sources import MBTilesReader
3228
3329#===============================================================================
3430
35- from mapknowledge import KnowledgeStore , NERVE_TYPE
31+ from mapknowledge import NERVE_TYPE
32+ from mapknowledge .competency import clean_knowledge_source , CompetencyDatabase
33+ from mapknowledge .competency import KnowledgeList , KnowledgeSource
34+ from mapserver .settings import settings
35+ from mapserver .utils import json_map_metadata
3636
3737#===============================================================================
3838
4242
4343KNOWLEDGE_USER = os .environ .get ('KNOWLEDGE_USER' )
4444KNOWLEDGE_HOST = os .environ .get ('KNOWLEDGE_HOST' , 'localhost:5432' )
45- FLATMAP_ROOT = os .environ .get ('FLATMAP_ROOT' )
4645
4746#===============================================================================
4847
49- def clean_source (source : str ) -> str :
50- if source .endswith ('-npo' ):
51- return source [:- 4 ]
52- return source
48+ # Used by `json_map_metadata`
5349
54- #===============================================================================
55-
56- type KnowledgeDict = dict [str , Any ]
57-
58- class KnowledgeList :
59- def __init__ (self , source : str , knowledge : Optional [list [KnowledgeDict ]]= None ):
60- self .__source = clean_source (source )
61- if knowledge is None :
62- self .__knowledge : list [KnowledgeDict ] = []
63- else :
64- self .__knowledge = knowledge
65-
66- @property
67- def source (self ):
68- return self .__source
69-
70- @property
71- def knowledge (self ):
72- return self .__knowledge
73-
74- def append (self , knowledge : KnowledgeDict ):
75- self .__knowledge .append (knowledge )
50+ settings ['FLATMAP_ROOT' ] = os .environ .get ('FLATMAP_ROOT' , './flatmaps' )
7651
7752#===============================================================================
7853
79- NODE_PHENOTYPES = [
80- 'ilxtr:hasSomaLocatedIn' ,
81- 'ilxtr:hasAxonPresynapticElementIn' ,
82- 'ilxtr:hasAxonSensorySubcellularElementIn' ,
83- 'ilxtr:hasAxonLeadingToSensorySubcellularElementIn' ,
84- 'ilxtr:hasAxonLocatedIn' ,
85- 'ilxtr:hasDendriteLocatedIn' ,
86- ]
87- NODE_TYPES = [
88- NERVE_TYPE ,
89- ]
90-
91- def setup_anatomical_types (cursor ):
92- #==================================
93- cursor .execute ('DELETE FROM anatomical_types at WHERE NOT EXISTS (SELECT 1 FROM path_node_types pt WHERE at.type_id = pt.type_id)' )
94- cursor .executemany ('INSERT INTO anatomical_types (type_id, label) VALUES (%s, %s) ON CONFLICT DO NOTHING' ,
95- [(type , type ) for type in NODE_PHENOTYPES + NODE_TYPES ])
96-
97- #===============================================================================
98-
99- def delete_source_from_tables (cursor , source : str ):
100- #==================================================
101- cursor .execute ('DELETE FROM path_taxons WHERE source_id=%s' , (source , ))
102- cursor .execute ('DELETE FROM feature_evidence WHERE source_id=%s' , (source , ))
103- cursor .execute ('DELETE FROM path_edges WHERE source_id=%s' , (source , ))
104- cursor .execute ('DELETE FROM path_features WHERE source_id=%s' , (source , ))
105- cursor .execute ('DELETE FROM path_node_features WHERE source_id=%s' , (source , ))
106- cursor .execute ('DELETE FROM path_forward_connections WHERE source_id=%s' , (source , ))
107- cursor .execute ('DELETE FROM path_node_types WHERE source_id=%s' , (source , ))
108- cursor .execute ('DELETE FROM path_phenotypes WHERE source_id=%s' , (source , ))
109- cursor .execute ('DELETE FROM path_properties WHERE source_id=%s' , (source , ))
110- cursor .execute ('DELETE FROM path_nodes WHERE source_id=%s' , (source , ))
111- cursor .execute ('DELETE FROM feature_types WHERE source_id=%s' , (source , ))
112- cursor .execute ('DELETE FROM feature_terms WHERE source_id=%s' , (source , ))
113-
114- def update_connectivity (cursor , knowledge : KnowledgeList ):
115- #=========================================================
116- source = knowledge .source
117- progress_bar = tqdm (total = len (knowledge .knowledge ),
118- unit = 'records' , ncols = 80 ,
119- bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt}' )
120- for record in knowledge .knowledge :
121- if source == clean_source (record .get ('source' , '' )):
122- if (connectivity := record .get ('connectivity' )) is not None :
123- path_id = record ['id' ]
124-
125- # Taxons
126- taxons = record .get ('taxons' , ['NCBITaxon:40674' ])
127- cursor .executemany ('INSERT INTO taxons (taxon_id) VALUES (%s) ON CONFLICT DO NOTHING' ,
128- ((taxon ,) for taxon in taxons ))
129-
130- # Path taxons
131- with cursor .copy ("COPY path_taxons (source_id, path_id, taxon_id) FROM STDIN" ) as copy :
132- for taxon in taxons :
133- copy .write_row ((source , path_id , taxon ))
134-
135- # Evidence
136- evidence = record .get ('references' , [])
137- cursor .executemany ('INSERT INTO evidence (evidence_id) VALUES (%s) ON CONFLICT DO NOTHING' ,
138- ((evidence ,) for evidence in evidence ))
139-
140- # Path evidence
141- with cursor .copy ("COPY feature_evidence (source_id, term_id, evidence_id) FROM STDIN" ) as copy :
142- for evidence_id in evidence :
143- copy .write_row ((source , path_id , evidence_id ))
144-
145- # Nodes
146- nodes = set (json .dumps (node ) for (node , _ ) in connectivity ) | set (json .dumps (node ) for (_ , node ) in connectivity )
147- cursor .executemany ('INSERT INTO path_nodes (source_id, path_id, node_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING' ,
148- ((source , path_id , node ,) for node in nodes ))
149-
150- # Node features
151- node_features = [ (source , path_id , node , feature )
152- for (node , features ) in [(node , json .loads (node )) for node in nodes ]
153- for feature in [features [0 ]] + features [1 ] ]
154- cursor .executemany ('INSERT INTO path_node_features (source_id, path_id, node_id, feature_id) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING' ,
155- node_features )
156-
157- # Path edges
158- path_nodes = [ (source , path_id , json .dumps (node_0 ), json .dumps (node_1 )) for (node_0 , node_1 ) in connectivity ]
159- with cursor .copy ("COPY path_edges (source_id, path_id, node_0, node_1) FROM STDIN" ) as copy :
160- for row in path_nodes :
161- copy .write_row (row )
162-
163- # Path features
164- path_features = [(source , path_id , feature ) for feature in set ([nf [3 ] for nf in node_features ])]
165- with cursor .copy ("COPY path_features (source_id, path_id, feature_id) FROM STDIN" ) as copy :
166- for row in path_features :
167- copy .write_row (row )
168-
169- # Forward connections
170- forward_connections = [(source , path_id , forward_path ) for forward_path in record .get ('forward-connections' , [])]
171- with cursor .copy ("COPY path_forward_connections (source_id, path_id, forward_path_id) FROM STDIN" ) as copy :
172- for row in forward_connections :
173- copy .write_row (row )
174-
175- # Path node types
176- node_types = []
177- node_phenotypes = record .get ('node-phenotypes' , {})
178- for type , nodes in node_phenotypes .items ():
179- node_types .extend ([(source , path_id , json .dumps (node ), type )
180- for node in nodes ])
181- node_types .extend ([(source , path_id , json .dumps (node ), NERVE_TYPE )
182- for node in record .get ('nerves' , [])])
183- with cursor .copy ("COPY path_node_types (source_id, path_id, node_id, type_id) FROM STDIN" ) as copy :
184- for row in node_types :
185- copy .write_row (row )
186-
187- # Path phenotypes
188- with cursor .copy ("COPY path_phenotypes (source_id, path_id, phenotype) FROM STDIN" ) as copy :
189- for phenotype in record .get ('phenotypes' , []):
190- copy .write_row ((source , path_id , phenotype ))
191-
192- # General path properties
193- cursor .execute ('INSERT INTO path_properties (source_id, path_id, biological_sex, alert, disconnected) VALUES (%s, %s, %s, %s, %s)' ,
194- (source , path_id , record .get ('biologicalSex' ), record .get ('alert' ), record .get ('pathDisconnected' )))
195-
196- progress_bar .update (1 )
197- progress_bar .close ()
198-
199- def update_features (cursor , knowledge : KnowledgeList ):
200- #=====================================================
201- source = knowledge .source
202- cursor .execute ('DELETE FROM feature_terms WHERE source_id=%s' , (source , ))
203-
204- for record in knowledge .knowledge :
205- if source == clean_source (record .get ('source' , '' )):
206-
207- # Feature terms
208- with cursor .copy ("COPY feature_terms (source_id, term_id, label, description) FROM STDIN" ) as copy :
209- copy .write_row ([source , record ['id' ], record .get ('label' ), record .get ('long-label' )])
210-
211- # Feature types
212- with cursor .copy ("COPY feature_types (source_id, term_id, type_id) FROM STDIN" ) as copy :
213- if (term_type := record .get ('type' )) is not None :
214- copy .write_row ([source , record ['id' ], term_type ])
215-
216- def update_knowledge_source (cursor , source ):
217- #===========================================
218- cursor .execute ('INSERT INTO knowledge_sources (source_id) VALUES (%s) ON CONFLICT DO NOTHING' , (source ,))
219-
220- #===============================================================================
221-
222- def pg_import (uuid ):
223- #=======================================
224- knowledge = map_knowledge (uuid )
225- user = f'{ KNOWLEDGE_USER } @' if KNOWLEDGE_USER else ''
226- with pg .connect (f'postgresql://{ user } { KNOWLEDGE_HOST } /{ PG_DATABASE } ' ) as db :
227- with db .cursor () as cursor :
228- delete_source_from_tables (cursor , knowledge .source )
229- setup_anatomical_types (cursor )
230- update_knowledge_source (cursor , knowledge .source )
231- update_features (cursor , knowledge )
232- update_connectivity (cursor , knowledge )
233- db .commit ()
234-
235- #===============================================================================
236-
237- def map_knowledge (uuid ) -> KnowledgeList :
238- #========================================
239- mbtiles = pathlib .Path (FLATMAP_ROOT ) / uuid / 'index.mbtiles'
240- if not mbtiles .exists ():
241- raise FileNotFoundError (f"MBTiles file not found at: { mbtiles } " )
242-
243- store = KnowledgeStore (
244- store_directory = FLATMAP_ROOT ,
245- knowledge_base = DEFAULT_STORE ,
246- read_only = False ,
247- use_sckan = False
248- )
249-
250- reader = MBTilesReader (mbtiles )
251-
252- # Load metadata
253- row = reader ._query ("SELECT value FROM metadata WHERE name='metadata'" ).fetchone ()
254- metadata = json .loads (row [0 ])
255- if uuid != metadata .get ('uuid' ):
54+ def get_map_knowledge (map_uuid : str , competency_db : CompetencyDatabase ) -> KnowledgeList :
55+ #========================================================================================
56+ metadata = json_map_metadata (map_uuid , 'metadata' )
57+ if map_uuid != metadata .get ('uuid' ):
25658 raise IOError ("Flatmap source UUID doesn't match the provided UUID." )
25759
25860 sckan_release = metadata .get ('connectivity' , {}).get ('npo' , {}).get ('release' )
259-
260- # Load pathways
261- row = reader ._query ("SELECT value FROM metadata WHERE name='pathways'" ).fetchone ()
262- pathways = json .loads (row [0 ]).get ('paths' , {})
61+ map_knowledge_source = clean_knowledge_source (sckan_release )
62+
63+ annotations = json_map_metadata (map_uuid , 'annotations' )
64+ annotated_features = { models : feature
65+ for feature in annotations .values ()
66+ if (models := feature .get ('models' )) is not None }
67+ descriptions = { row [0 ]: row [1 ]
68+ for row in competency_db .execute (
69+ 'select term_id, description from feature_terms where source_id=%s' , (map_knowledge_source ,)) }
70+ path_properties = {}
71+ for row in competency_db .execute (
72+ 'select path_id, alert, biological_sex, disconnected from path_properties where source_id=%s' , (map_knowledge_source ,)):
73+ properties = {}
74+ if row [1 ] is not None :
75+ properties ['alert' ] = row [1 ]
76+ if row [2 ] is not None :
77+ properties ['biologicalSex' ] = row [2 ]
78+ if row [3 ] is not None :
79+ properties ['pathDisconnected' ] = row [3 ]
80+ path_properties [row [0 ]] = properties
81+
82+ path_evidence = defaultdict (list )
83+ for row in competency_db .execute (
84+ 'select term_id, evidence_id from feature_evidence where source_id=%s' , (map_knowledge_source ,)):
85+ path_evidence [row [0 ]].append (row [1 ])
86+
87+ path_phenotypes = defaultdict (list )
88+ for row in competency_db .execute (
89+ 'select path_id, phenotype from path_phenotypes where source_id=%s' , (map_knowledge_source ,)):
90+ path_phenotypes [row [0 ]].append (row [1 ])
91+
92+ # Collect all map knowledge
26393 knowledge_terms = {}
26494
265- for path_id , path in pathways .items ():
266- if 'connectivity' not in path :
95+ # Path features (i.e. those with connectivity)
96+ pathways = json_map_metadata (map_uuid , 'pathways' ).get ('paths' , {})
97+ for path_id , path_knowledge in pathways .items ():
98+ if 'connectivity' not in path_knowledge :
26799 continue
268-
269- db_knowledge = store . entity_knowledge (path_id , sckan_release )
100+ annotations = annotated_features . get ( path_id , {})
101+ properties = path_properties . get (path_id , {} )
270102 knowledge_terms [path_id ] = {
271103 'id' : path_id ,
272- 'label' : db_knowledge ['label' ],
273- 'long-label' : db_knowledge ['long-label' ],
274- 'connectivity' : path ['connectivity' ],
275- 'taxons' : [metadata .get ('taxon' , '' )],
276- 'forward-connections' : path ['forward-connections' ],
277- 'node-phenotypes' : path ['node-phenotypes' ],
278- 'nerves' : path .get ('node-nerves' , []),
279- 'pathDisconnected' : db_knowledge ['pathDisconnected' ],
280- 'phenotypes' : db_knowledge .get ('phenotypes' , []),
281- 'source' : uuid ,
282- 'references' : db_knowledge .get ('references' , []),
283- 'alert' : db_knowledge .get ('alert' , [])
104+ 'source' : map_uuid ,
105+ 'label' : annotations ['label' ],
106+ 'long-label' : descriptions [path_id ],
107+ 'connectivity' : path_knowledge ['connectivity' ],
108+ 'taxons' : annotations .get ('taxons' , []),
109+ 'forward-connections' : path_knowledge ['forward-connections' ],
110+ 'node-phenotypes' : path_knowledge ['node-phenotypes' ],
111+ 'nerves' : path_knowledge .get ('node-nerves' , []),
112+ 'phenotypes' : path_phenotypes .get (path_id , []),
113+ 'references' : path_evidence .get (path_id , []),
284114 }
285-
286- # Load annotations
287- row = reader ._query ("SELECT value FROM metadata WHERE name='annotations'" ).fetchone ()
288- annotations = json .loads (row [0 ])
289-
290- for feature in annotations .values ():
291- model = feature .get ('models' )
292- if model and model not in knowledge_terms :
293- db_knowledge = store .entity_knowledge (model , sckan_release )
294- knowledge_terms [model ] = {
295- 'id' : model ,
296- 'label' : db_knowledge ['label' ],
297- 'source' : uuid ,
298- ** ({'type' : db_knowledge ['type' ]} if 'type' in db_knowledge else {})
115+ if 'alert' in properties :
116+ knowledge_terms [path_id ]['alert' ] = properties ['alert' ]
117+ if 'biologicalSex' in properties :
118+ knowledge_terms [path_id ]['biologicalSex' ] = properties ['biologicalSex' ]
119+ if 'pathDisconnected' in properties :
120+ knowledge_terms [path_id ]['pathDisconnected' ] = properties ['pathDisconnected' ]
121+
122+ # Non-path features with an anatomical term
123+ for feature_id , properties in annotated_features .items ():
124+ if feature_id not in knowledge_terms :
125+ knowledge_terms [feature_id ] = {
126+ 'id' : feature_id ,
127+ 'source' : map_uuid ,
128+ 'label' : properties ['label' ],
129+ 'long-label' : descriptions [feature_id ],
299130 }
131+ if properties .get ('type' ) == 'nerve' :
132+ knowledge_terms [feature_id ]['type' ] = NERVE_TYPE
300133
301- return KnowledgeList (uuid , list (knowledge_terms .values ()))
134+ return KnowledgeList (KnowledgeSource ( map_uuid , sckan_release , metadata [ 'name' ]) , list (knowledge_terms .values ()))
302135
303136#===============================================================================
304137
@@ -313,7 +146,10 @@ def main():
313146
314147 if not args .quiet :
315148 logging .basicConfig (level = logging .INFO )
316- pg_import (args .uuid )
149+
150+ competency_db = CompetencyDatabase (KNOWLEDGE_USER , KNOWLEDGE_HOST , PG_DATABASE )
151+ knowledge = get_map_knowledge (args .uuid , competency_db )
152+ competency_db .import_knowledge (knowledge )
317153
318154#===============================================================================
319155
0 commit comments