microsoft
diff --git a/‎package.json‎
Lines changed: 1 addition & 0 deletions b/‎package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎py-src/data_formulator/agent_routes.py‎
Lines changed: 37 additions & 37 deletions b/‎py-src/data_formulator/agent_routes.py‎
Lines changed: 37 additions & 37 deletions
diff --git a/‎py-src/data_formulator/agents/agent_chart_insight.py‎
Lines changed: 7 additions & 4 deletions b/‎py-src/data_formulator/agents/agent_chart_insight.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎py-src/data_formulator/agents/agent_code_explanation.py‎
Lines changed: 6 additions & 3 deletions b/‎py-src/data_formulator/agents/agent_code_explanation.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_clean_stream.py‎
Lines changed: 5 additions & 3 deletions b/‎py-src/data_formulator/agents/agent_data_clean_stream.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_load.py‎
Lines changed: 7 additions & 4 deletions b/‎py-src/data_formulator/agents/agent_data_load.py‎
Lines changed: 7 additions & 4 deletions
@@ -14,6 +14,7 @@
         "@types/dompurify": "^3.0.5",
         "@types/validator": "^13.12.2",
         "allotment": "^1.20.4",
+        "canvas": "^3.2.1",
         "chart.js": "^4.5.1",
         "d3": "^7.3.0",
         "dompurify": "^3.2.4",
 
@@ -168,12 +168,12 @@ def sanitize_model_error(error_message: str) -> str:
 @agent_bp.route('/test-model', methods=['GET', 'POST'])
 def test_model():
     if request.is_json:
-        logger.info("# code query: ")
+        logger.info("# test-model request")
         content = request.get_json()
 
         # contains endpoint, key, model, api_base, api_version
-        logger.info("content------------------------------")
-        logger.info(content)
+        logger.debug("content------------------------------")
+        logger.debug(content)
 
         client = get_client(content['model'])
 
@@ -185,8 +185,8 @@ def test_model():
                 ]
             )
 
-            logger.info(f"model: {content['model']}")
-            logger.info(f"welcome message: {response.choices[0].message.content}")
+            logger.debug(f"model: {content['model']}")
+            logger.debug(f"welcome message: {response.choices[0].message.content}")
 
             if "I can hear you." in response.choices[0].message.content:
                 result = {
@@ -211,14 +211,14 @@ def test_model():
 def process_data_on_load_request():
 
     if request.is_json:
-        logger.info("# process data query: ")
+        logger.info("# process-data-on-load request")
         content = request.get_json()
         token = content["token"]
         input_data = content["input_data"]
 
         client = get_client(content['model'])
 
-        logger.info(f" model: {content['model']}")
+        logger.debug(f" model: {content['model']}")
 
         try:
             # Get workspace (needed for both virtual and in-memory tables)
@@ -249,13 +249,13 @@ def process_data_on_load_request():
 def clean_data_stream_request():
     def generate():
         if request.is_json:
-            logger.info("# data clean stream request")
+            logger.info("# clean-data-stream request")
             content = request.get_json()
             token = content["token"]
 
             client = get_client(content['model'])
 
-            logger.info(f" model: {content['model']}")
+            logger.debug(f" model: {content['model']}")
 
             agent = DataCleanAgentStream(client=client)
 
@@ -301,7 +301,7 @@ def generate():
 def sort_data_request():
 
     if request.is_json:
-        logger.info("# sort query: ")
+        logger.info("# sort-data request")
         content = request.get_json()
         token = content["token"]
 
@@ -327,7 +327,7 @@ def sort_data_request():
 def derive_data():
 
     if request.is_json:
-        logger.info("# request data: ")
+        logger.info("# derive-data request")
         content = request.get_json()        
         token = content["token"]
 
@@ -348,13 +348,13 @@ def derive_data():
         else:
             prev_messages = []
 
-        logger.info("== input tables ===>")
+        logger.debug("== input tables ===>")
         for table in input_tables:
-            logger.info(f"===> Table: {table['name']} (first 5 rows)")
-            logger.info(table['rows'][:5])
+            logger.debug(f"===> Table: {table['name']} (first 5 rows)")
+            logger.debug(table['rows'][:5])
 
-        logger.info("== user spec ===")
-        logger.info(instruction)
+        logger.debug("== user spec ===")
+        logger.debug(instruction)
 
         # If user provided chart encodings (via visualization context), use transform mode; otherwise recommendation
         mode = "transform" if current_visualization or expected_visualization else "recommendation"
@@ -379,7 +379,7 @@ def derive_data():
                 repair_attempts = 0
                 while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts:
                     error_message = results[0]['content']
-                    logger.info(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}")
+                    logger.warning(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}")
                     new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
 
                     prev_dialog = results[0]['dialog']
@@ -390,10 +390,10 @@ def derive_data():
                         results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1)
 
                     repair_attempts += 1
-                    logger.info(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0]['status']}")
+                    logger.warning(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0]['status']}")
 
                 if repair_attempts > 0:
-                    logger.info(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0]['status']}")
+                    logger.warning(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0]['status']}")
 
             # Sign code in each result so the frontend can send it back
             # for re-execution during data refresh with proof of authenticity.
@@ -417,7 +417,7 @@ def generate():
         if request.is_json:
             logger.setLevel(logging.INFO)
 
-            logger.info("# explore data request: ")
+            logger.info("# explore-data-streaming request")
             content = request.get_json()        
             token = content["token"]
 
@@ -430,13 +430,13 @@ def generate():
             agent_coding_rules = content.get("agent_coding_rules", "")
             conversation_history = content.get("conversation_history", None)
 
-            logger.info("== input tables ===>")
+            logger.debug("== input tables ===>")
             for table in input_tables:
-                logger.info(f"===> Table: {table['name']} (first 5 rows)")
-                logger.info(table['rows'][:5])
+                logger.debug(f"===> Table: {table['name']} (first 5 rows)")
+                logger.debug(table['rows'][:5])
 
-            logger.info("== exploration question ===")
-            logger.info(initial_plan)
+            logger.debug("== exploration question ===")
+            logger.debug(initial_plan)
 
             # Model config for the exploration flow
             model_config = {
@@ -542,12 +542,12 @@ def generate():
             clarification_response = content.get("clarification_response", None)
             completed_step_count = content.get("completed_step_count", 0)
 
-            logger.info("== input tables ===>")
+            logger.debug("== input tables ===>")
             for table in input_tables:
-                logger.info(f"===> Table: {table['name']} (first 5 rows)")
-                logger.info(table['rows'][:5])
+                logger.debug(f"===> Table: {table['name']} (first 5 rows)")
+                logger.debug(table['rows'][:5])
 
-            logger.info(f"== user question ===> {user_question}")
+            logger.debug(f"== user question ===> {user_question}")
 
             client = get_client(content['model'])
             identity_id = get_identity_id()
@@ -583,7 +583,7 @@ def generate():
                             "role": "user",
                             "content": f"[USER CLARIFICATION]\n\n{clarification_response}",
                         })
-                        logger.info(f"== resuming with clarification ===> {clarification_response}")
+                        logger.debug(f"== resuming with clarification ===> {clarification_response}")
 
                     for event in agent.run(
                         input_tables=input_tables,
@@ -638,7 +638,7 @@ def generate():
 def refine_data():
 
     if request.is_json:
-        logger.info("# request data: ")
+        logger.info("# refine-data request")
         content = request.get_json()        
         token = content["token"]
 
@@ -656,13 +656,13 @@ def refine_data():
         current_visualization = content.get("current_visualization", None)
         expected_visualization = content.get("expected_visualization", None)
 
-        logger.info("== input tables ===>")
+        logger.debug("== input tables ===>")
         for table in input_tables:
-            logger.info(f"===> Table: {table['name']} (first 5 rows)")
-            logger.info(table['rows'][:5])
+            logger.debug(f"===> Table: {table['name']} (first 5 rows)")
+            logger.debug(table['rows'][:5])
 
-        logger.info("== user spec ===>")
-        logger.info(new_instruction)
+        logger.debug("== user spec ===>")
+        logger.debug(new_instruction)
 
         try:
             identity_id = get_identity_id()
@@ -708,7 +708,7 @@ def refine_data():
 @agent_bp.route('/code-expl', methods=['GET', 'POST'])
 def request_code_expl():
     if request.is_json:
-        logger.info("# request data: ")
+        logger.info("# code-expl request")
         content = request.get_json()
         client = get_client(content['model'])
 
 
@@ -35,7 +35,7 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
         
         Args:
             chart_image_base64: Base64-encoded PNG data URL of the chart
-            chart_type: The type of chart (e.g., "bar", "scatter")
+            chart_type: The type of chart (e.g., "Bar Chart", "Scatter Plot")
             field_names: List of field names used in the chart encodings
             input_tables: Optional list of input table dicts for data context
             n: Number of candidates to generate
@@ -74,14 +74,15 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
             {"role": "user", "content": user_content}
         ]
 
-        logger.info(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}")
+        logger.debug(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}")
+        logger.info(f"[ChartInsightAgent] run start | chart_type={chart_type}")
 
         response = self.client.get_completion(messages=messages)
 
         candidates = []
         for choice in response.choices:
-            logger.info("\n=== Chart insight result ===>\n")
-            logger.info(choice.message.content + "\n")
+            logger.debug("\n=== Chart insight result ===>\n")
+            logger.debug(choice.message.content + "\n")
 
             response_content = choice.message.content
             title = ""
@@ -115,4 +116,6 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
 
             candidates.append(result)
 
+        status = candidates[0].get('status', '?') if candidates else 'empty'
+        logger.info(f"[ChartInsightAgent] run done | status={status}")
         return candidates
@@ -150,7 +150,8 @@ def run(self, input_tables, code, n=1):
 
         user_query = f"[CONTEXT]\n\n{data_summary}\n\n[CODE]\n\nhere is the transformation code: {code}\n\n[EXPLANATION]\n"
 
-        logger.info(user_query)
+        logger.debug(user_query)
+        logger.info(f"[CodeExplanationAgent] run start")
 
         messages = [{"role":"system", "content": SYSTEM_PROMPT},
                     {"role":"user","content": user_query}]
@@ -160,8 +161,8 @@ def run(self, input_tables, code, n=1):
         candidates = []
         for choice in response.choices:
 
-            logger.info("\n=== Code explanation result ===>\n")
-            logger.info(choice.message.content + "\n")
+            logger.debug("\n=== Code explanation result ===>\n")
+            logger.debug(choice.message.content + "\n")
 
             # Inline parsing of concepts section
             response_content = choice.message.content
@@ -198,4 +199,6 @@ def run(self, input_tables, code, n=1):
 
             candidates.append(result)
 
+        status = candidates[0].get('status', '?') if candidates else 'empty'
+        logger.info(f"[CodeExplanationAgent] run done | status={status}")
         return candidates
@@ -183,7 +183,8 @@ def stream(self, prompt, artifacts=[], dialog=[]):
             'content': content
         }
 
-        logger.info(user_prompt)
+        logger.debug(user_prompt)
+        logger.info(f"[DataCleanAgent] run start (streaming)")
 
         system_message = {
             'role': 'system',
@@ -211,8 +212,8 @@ def stream(self, prompt, artifacts=[], dialog=[]):
                     yield delta.content
 
         # Parse the final content the same way as the non-streaming version
-        logger.info("\n=== Python Data Clean Agent Stream ===>\n")
-        logger.info(accumulated_content + "\n")
+        logger.debug("\n=== Python Data Clean Agent Stream ===>\n")
+        logger.debug(accumulated_content + "\n")
 
         # Parse table sections from the accumulated content
         tables = parse_table_sections(accumulated_content)
@@ -228,6 +229,7 @@ def stream(self, prompt, artifacts=[], dialog=[]):
 
         result['dialog'] = [*messages, {"role": "assistant", "content": accumulated_content}]
         result['agent'] = 'DataCleanAgentStream'
+        logger.info(f"[DataCleanAgent] run done | status={result.get('status', '?')}")
 
         # add a newline to the beginning of the result to separate it from the previous result     
         yield '\n' + json.dumps(result) + '\n'
@@ -175,7 +175,8 @@ def run(self, input_data, n=1):
 
         user_query = f"[DATA]\n\n{data_summary}\n\n[OUTPUT]"
 
-        logger.info(user_query)
+        logger.debug(user_query)
+        logger.info(f"[DataLoadAgent] run start")
 
         messages = [{"role":"system", "content": SYSTEM_PROMPT},
                     {"role":"user","content": user_query}]
@@ -185,11 +186,11 @@ def run(self, input_data, n=1):
         candidates = []
         for choice in response.choices:
 
-            logger.info("\n=== Data load result ===>\n")
-            logger.info(choice.message.content + "\n")
+            logger.debug("\n=== Data load result ===>\n")
+            logger.debug(choice.message.content + "\n")
 
             json_blocks = extract_json_objects(choice.message.content + "\n")
-            logger.info(json_blocks)
+            logger.debug(json_blocks)
 
             if len(json_blocks) > 0:
                 result = {'status': 'ok', 'content': json_blocks[0]}
@@ -206,4 +207,6 @@ def run(self, input_data, n=1):
 
             candidates.append(result)
 
+        status = candidates[0].get('status', '?') if candidates else 'empty'
+        logger.info(f"[DataLoadAgent] run done | status={status}")
         return candidates