2424version = 7
2525
2626API_BASE = "https://v3-cached.virtualflybrain.org/get_term_info"
27+ STATUS_URL = "https://vfbquery.virtualflybrain.org/status"
2728VFB_BROWSER_BASE = "https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto"
2829
30+ # Throttle settings — stay under 20 concurrent to keep API reliable
31+ MAX_ACTIVE_BEFORE_BACKOFF = 20 # Back off when this many queries are active
32+ BACKOFF_SECONDS = 120 # How long to wait when server is busy
33+ STATUS_CHECK_INTERVAL = 10 # Seconds between status checks while waiting
34+
2935# Known ID prefixes for internal link conversion
3036KNOWN_PREFIXES = (
3137 'FBbt_' , 'FBbi_' , 'FBcv_' , 'FBdv_' , 'FBal' , 'FBrf' , 'FBgn' , 'FBti' , 'FBtp' ,
@@ -52,10 +58,56 @@ def create_session():
5258
5359session = create_session ()
5460
61+ # ─── Server Throttling ───────────────────────────────────────────────────────
62+
63+ def check_server_status ():
64+ """Check VFBquery server status. Returns (active, waiting) or None on error."""
65+ try :
66+ resp = session .get (STATUS_URL , timeout = 10 )
67+ resp .raise_for_status ()
68+ data = resp .json ()
69+ active = data .get ("active" , 0 )
70+ waiting = data .get ("waiting" , 0 )
71+ return active , waiting
72+ except Exception as e :
73+ print (f"WARNING: Could not check server status: { e } " )
74+ return None
75+
76+ def wait_for_server_capacity ():
77+ """Block until the server has capacity below our threshold.
78+
79+ Monitors the /status endpoint and waits when active queries >= MAX_ACTIVE_BEFORE_BACKOFF
80+ or when any queries are waiting in the queue. This is a low-priority process
81+ and should not flood the API servers.
82+ """
83+ while True :
84+ status = check_server_status ()
85+ if status is None :
86+ # Can't reach status endpoint — back off to be safe
87+ print (f" Status endpoint unreachable, backing off { BACKOFF_SECONDS } s..." )
88+ time .sleep (BACKOFF_SECONDS )
89+ continue
90+
91+ active , waiting = status
92+ if waiting > 0 or active >= MAX_ACTIVE_BEFORE_BACKOFF :
93+ print (f" Server busy: { active } active, { waiting } queued. "
94+ f"Backing off { BACKOFF_SECONDS } s... (threshold: { MAX_ACTIVE_BEFORE_BACKOFF } )" )
95+ time .sleep (BACKOFF_SECONDS )
96+ continue
97+
98+ # Server has capacity
99+ return
100+
55101# ─── Data Fetching ───────────────────────────────────────────────────────────
56102
57103def fetch_term_info (term_id ):
58- """Fetch term info from VFBquery API. Returns dict or None on error."""
104+ """Fetch term info from VFBquery API. Returns dict or None on error.
105+
106+ Checks server capacity before making the request to avoid flooding.
107+ """
108+ # Wait until the server isn't overloaded
109+ wait_for_server_capacity ()
110+
59111 try :
60112 resp = session .get (API_BASE , params = {"id" : term_id }, timeout = 9000 )
61113 resp .raise_for_status ()
@@ -526,8 +578,7 @@ def save_terms(ids):
526578 os .remove (old_filename )
527579 print (f'Removed: { old_filename } ' )
528580
529- # Brief pause to avoid overwhelming the API
530- time .sleep (0.5 )
581+ # Throttling is handled by wait_for_server_capacity() in fetch_term_info
531582
532583 except Exception as e :
533584 fail_count += 1
0 commit comments