@@ -104,47 +104,70 @@ def __setstate__(self, state) -> None: # type: ignore[no-untyped-def]
104104 self ._call_thread_pool = None
105105 self ._call_thread_pool_lock = threading .RLock ()
106106
107+ def _cleanup_owned_lcm (self ) -> None :
108+ """Dispose of the internally-owned LCM instance."""
109+ if self .config .lcm :
110+ return
111+
112+ with self ._l_lock :
113+ if self .l is not None :
114+ del self .l
115+ self .l = None
116+
107117 def start (self ) -> None :
108- # Reinitialize LCM if it's None (e.g., after unpickling)
109- if self .l is None :
110- if self .config .lcm :
111- self .l = self .config .lcm
112- else :
113- self .l = lcm_mod .LCM (self .config .url ) if self .config .url else lcm_mod .LCM ()
114-
115- self ._stop_event .clear ()
116- self ._thread = threading .Thread (target = self ._lcm_loop )
117- self ._thread .daemon = True
118- self ._thread .start ()
118+ with self ._l_lock :
119+ if self ._thread is not None and self ._thread .is_alive ():
120+ return
121+
122+ # Reinitialize LCM if it's None (e.g., after unpickling)
123+ if self .l is None :
124+ if self .config .lcm :
125+ self .l = self .config .lcm
126+ else :
127+ self .l = lcm_mod .LCM (self .config .url ) if self .config .url else lcm_mod .LCM ()
128+
129+ self ._stop_event .clear ()
130+ self ._thread = threading .Thread (target = self ._lcm_loop , daemon = True )
131+ self ._thread .start ()
119132
120133 def _lcm_loop (self ) -> None :
121134 """LCM message handling loop."""
122- while not self . _stop_event . is_set () :
123- try :
135+ try :
136+ while not self . _stop_event . is_set () :
124137 with self ._l_lock :
125- if self .l is None :
138+ l = self .l
139+ if l is None :
126140 break
127- self .l .handle_timeout (_LCM_LOOP_TIMEOUT )
128- except Exception as e :
129- stack_trace = traceback .format_exc ()
130- print (f"Error in LCM handling: { e } \n { stack_trace } " )
141+ try :
142+ # This doesn't have to be under a lock because the C
143+ # library has its own locking for this.
144+ l .handle_timeout (_LCM_LOOP_TIMEOUT )
145+ except Exception as e :
146+ stack_trace = traceback .format_exc ()
147+ print (f"Error in LCM handling: { e } \n { stack_trace } " )
148+ finally :
149+ self ._cleanup_owned_lcm ()
150+ with self ._l_lock :
151+ if self ._thread is threading .current_thread ():
152+ self ._thread = None
131153
132154 def stop (self ) -> None :
133155 """Stop the LCM loop."""
134156 self ._stop_event .set ()
135- if self ._thread is not None :
157+ thread = self ._thread
158+ if thread is not None :
136159 # Only join if we're not the LCM thread (avoid "cannot join current thread")
137- if threading .current_thread () != self . _thread :
138- self . _thread .join (timeout = DEFAULT_THREAD_JOIN_TIMEOUT )
139- if self . _thread .is_alive ():
160+ if threading .current_thread () != thread :
161+ thread .join (timeout = DEFAULT_THREAD_JOIN_TIMEOUT )
162+ if thread .is_alive ():
140163 logger .warning ("LCM thread did not stop cleanly within timeout" )
141164
142- # Clean up LCM instance if we created it
143- if not self . config . lcm :
144- with self . _l_lock :
145- if self . l is not None :
146- del self . l
147- self .l = None
165+ # If the thread is still alive, do not clean up now. _lcm_loop will
166+ # clean up when it exits. If we try to clean up here as well it could
167+ # race with the cleanup in _lcm_loop and segfault, and it would leave
168+ # the service half-stopped with a live thread but no LCM instance.
169+ if thread is None or not thread . is_alive ():
170+ self ._cleanup_owned_lcm ()
148171
149172 with self ._call_thread_pool_lock :
150173 if self ._call_thread_pool :
0 commit comments