Skip to content

Commit 1207980

Browse files
committed
updates to both frontend and backend
1 parent e122c95 commit 1207980

38 files changed

Lines changed: 2617 additions & 951 deletions

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"@types/dompurify": "^3.0.5",
1515
"@types/validator": "^13.12.2",
1616
"allotment": "^1.20.4",
17+
"canvas": "^3.2.1",
1718
"chart.js": "^4.5.1",
1819
"d3": "^7.3.0",
1920
"dompurify": "^3.2.4",

py-src/data_formulator/agent_routes.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,12 @@ def sanitize_model_error(error_message: str) -> str:
168168
@agent_bp.route('/test-model', methods=['GET', 'POST'])
169169
def test_model():
170170
if request.is_json:
171-
logger.info("# code query: ")
171+
logger.info("# test-model request")
172172
content = request.get_json()
173173

174174
# contains endpoint, key, model, api_base, api_version
175-
logger.info("content------------------------------")
176-
logger.info(content)
175+
logger.debug("content------------------------------")
176+
logger.debug(content)
177177

178178
client = get_client(content['model'])
179179

@@ -185,8 +185,8 @@ def test_model():
185185
]
186186
)
187187

188-
logger.info(f"model: {content['model']}")
189-
logger.info(f"welcome message: {response.choices[0].message.content}")
188+
logger.debug(f"model: {content['model']}")
189+
logger.debug(f"welcome message: {response.choices[0].message.content}")
190190

191191
if "I can hear you." in response.choices[0].message.content:
192192
result = {
@@ -211,14 +211,14 @@ def test_model():
211211
def process_data_on_load_request():
212212

213213
if request.is_json:
214-
logger.info("# process data query: ")
214+
logger.info("# process-data-on-load request")
215215
content = request.get_json()
216216
token = content["token"]
217217
input_data = content["input_data"]
218218

219219
client = get_client(content['model'])
220220

221-
logger.info(f" model: {content['model']}")
221+
logger.debug(f" model: {content['model']}")
222222

223223
try:
224224
# Get workspace (needed for both virtual and in-memory tables)
@@ -249,13 +249,13 @@ def process_data_on_load_request():
249249
def clean_data_stream_request():
250250
def generate():
251251
if request.is_json:
252-
logger.info("# data clean stream request")
252+
logger.info("# clean-data-stream request")
253253
content = request.get_json()
254254
token = content["token"]
255255

256256
client = get_client(content['model'])
257257

258-
logger.info(f" model: {content['model']}")
258+
logger.debug(f" model: {content['model']}")
259259

260260
agent = DataCleanAgentStream(client=client)
261261

@@ -301,7 +301,7 @@ def generate():
301301
def sort_data_request():
302302

303303
if request.is_json:
304-
logger.info("# sort query: ")
304+
logger.info("# sort-data request")
305305
content = request.get_json()
306306
token = content["token"]
307307

@@ -327,7 +327,7 @@ def sort_data_request():
327327
def derive_data():
328328

329329
if request.is_json:
330-
logger.info("# request data: ")
330+
logger.info("# derive-data request")
331331
content = request.get_json()
332332
token = content["token"]
333333

@@ -348,13 +348,13 @@ def derive_data():
348348
else:
349349
prev_messages = []
350350

351-
logger.info("== input tables ===>")
351+
logger.debug("== input tables ===>")
352352
for table in input_tables:
353-
logger.info(f"===> Table: {table['name']} (first 5 rows)")
354-
logger.info(table['rows'][:5])
353+
logger.debug(f"===> Table: {table['name']} (first 5 rows)")
354+
logger.debug(table['rows'][:5])
355355

356-
logger.info("== user spec ===")
357-
logger.info(instruction)
356+
logger.debug("== user spec ===")
357+
logger.debug(instruction)
358358

359359
# If user provided chart encodings (via visualization context), use transform mode; otherwise recommendation
360360
mode = "transform" if current_visualization or expected_visualization else "recommendation"
@@ -379,7 +379,7 @@ def derive_data():
379379
repair_attempts = 0
380380
while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts:
381381
error_message = results[0]['content']
382-
logger.info(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}")
382+
logger.warning(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}")
383383
new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
384384

385385
prev_dialog = results[0]['dialog']
@@ -390,10 +390,10 @@ def derive_data():
390390
results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1)
391391

392392
repair_attempts += 1
393-
logger.info(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0]['status']}")
393+
logger.warning(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0]['status']}")
394394

395395
if repair_attempts > 0:
396-
logger.info(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0]['status']}")
396+
logger.warning(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0]['status']}")
397397

398398
# Sign code in each result so the frontend can send it back
399399
# for re-execution during data refresh with proof of authenticity.
@@ -417,7 +417,7 @@ def generate():
417417
if request.is_json:
418418
logger.setLevel(logging.INFO)
419419

420-
logger.info("# explore data request: ")
420+
logger.info("# explore-data-streaming request")
421421
content = request.get_json()
422422
token = content["token"]
423423

@@ -430,13 +430,13 @@ def generate():
430430
agent_coding_rules = content.get("agent_coding_rules", "")
431431
conversation_history = content.get("conversation_history", None)
432432

433-
logger.info("== input tables ===>")
433+
logger.debug("== input tables ===>")
434434
for table in input_tables:
435-
logger.info(f"===> Table: {table['name']} (first 5 rows)")
436-
logger.info(table['rows'][:5])
435+
logger.debug(f"===> Table: {table['name']} (first 5 rows)")
436+
logger.debug(table['rows'][:5])
437437

438-
logger.info("== exploration question ===")
439-
logger.info(initial_plan)
438+
logger.debug("== exploration question ===")
439+
logger.debug(initial_plan)
440440

441441
# Model config for the exploration flow
442442
model_config = {
@@ -542,12 +542,12 @@ def generate():
542542
clarification_response = content.get("clarification_response", None)
543543
completed_step_count = content.get("completed_step_count", 0)
544544

545-
logger.info("== input tables ===>")
545+
logger.debug("== input tables ===>")
546546
for table in input_tables:
547-
logger.info(f"===> Table: {table['name']} (first 5 rows)")
548-
logger.info(table['rows'][:5])
547+
logger.debug(f"===> Table: {table['name']} (first 5 rows)")
548+
logger.debug(table['rows'][:5])
549549

550-
logger.info(f"== user question ===> {user_question}")
550+
logger.debug(f"== user question ===> {user_question}")
551551

552552
client = get_client(content['model'])
553553
identity_id = get_identity_id()
@@ -583,7 +583,7 @@ def generate():
583583
"role": "user",
584584
"content": f"[USER CLARIFICATION]\n\n{clarification_response}",
585585
})
586-
logger.info(f"== resuming with clarification ===> {clarification_response}")
586+
logger.debug(f"== resuming with clarification ===> {clarification_response}")
587587

588588
for event in agent.run(
589589
input_tables=input_tables,
@@ -638,7 +638,7 @@ def generate():
638638
def refine_data():
639639

640640
if request.is_json:
641-
logger.info("# request data: ")
641+
logger.info("# refine-data request")
642642
content = request.get_json()
643643
token = content["token"]
644644

@@ -656,13 +656,13 @@ def refine_data():
656656
current_visualization = content.get("current_visualization", None)
657657
expected_visualization = content.get("expected_visualization", None)
658658

659-
logger.info("== input tables ===>")
659+
logger.debug("== input tables ===>")
660660
for table in input_tables:
661-
logger.info(f"===> Table: {table['name']} (first 5 rows)")
662-
logger.info(table['rows'][:5])
661+
logger.debug(f"===> Table: {table['name']} (first 5 rows)")
662+
logger.debug(table['rows'][:5])
663663

664-
logger.info("== user spec ===>")
665-
logger.info(new_instruction)
664+
logger.debug("== user spec ===>")
665+
logger.debug(new_instruction)
666666

667667
try:
668668
identity_id = get_identity_id()
@@ -708,7 +708,7 @@ def refine_data():
708708
@agent_bp.route('/code-expl', methods=['GET', 'POST'])
709709
def request_code_expl():
710710
if request.is_json:
711-
logger.info("# request data: ")
711+
logger.info("# code-expl request")
712712
content = request.get_json()
713713
client = get_client(content['model'])
714714

py-src/data_formulator/agents/agent_chart_insight.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
3535
3636
Args:
3737
chart_image_base64: Base64-encoded PNG data URL of the chart
38-
chart_type: The type of chart (e.g., "bar", "scatter")
38+
chart_type: The type of chart (e.g., "Bar Chart", "Scatter Plot")
3939
field_names: List of field names used in the chart encodings
4040
input_tables: Optional list of input table dicts for data context
4141
n: Number of candidates to generate
@@ -74,14 +74,15 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
7474
{"role": "user", "content": user_content}
7575
]
7676

77-
logger.info(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}")
77+
logger.debug(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}")
78+
logger.info(f"[ChartInsightAgent] run start | chart_type={chart_type}")
7879

7980
response = self.client.get_completion(messages=messages)
8081

8182
candidates = []
8283
for choice in response.choices:
83-
logger.info("\n=== Chart insight result ===>\n")
84-
logger.info(choice.message.content + "\n")
84+
logger.debug("\n=== Chart insight result ===>\n")
85+
logger.debug(choice.message.content + "\n")
8586

8687
response_content = choice.message.content
8788
title = ""
@@ -115,4 +116,6 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
115116

116117
candidates.append(result)
117118

119+
status = candidates[0].get('status', '?') if candidates else 'empty'
120+
logger.info(f"[ChartInsightAgent] run done | status={status}")
118121
return candidates

py-src/data_formulator/agents/agent_code_explanation.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ def run(self, input_tables, code, n=1):
150150

151151
user_query = f"[CONTEXT]\n\n{data_summary}\n\n[CODE]\n\nhere is the transformation code: {code}\n\n[EXPLANATION]\n"
152152

153-
logger.info(user_query)
153+
logger.debug(user_query)
154+
logger.info(f"[CodeExplanationAgent] run start")
154155

155156
messages = [{"role":"system", "content": SYSTEM_PROMPT},
156157
{"role":"user","content": user_query}]
@@ -160,8 +161,8 @@ def run(self, input_tables, code, n=1):
160161
candidates = []
161162
for choice in response.choices:
162163

163-
logger.info("\n=== Code explanation result ===>\n")
164-
logger.info(choice.message.content + "\n")
164+
logger.debug("\n=== Code explanation result ===>\n")
165+
logger.debug(choice.message.content + "\n")
165166

166167
# Inline parsing of concepts section
167168
response_content = choice.message.content
@@ -198,4 +199,6 @@ def run(self, input_tables, code, n=1):
198199

199200
candidates.append(result)
200201

202+
status = candidates[0].get('status', '?') if candidates else 'empty'
203+
logger.info(f"[CodeExplanationAgent] run done | status={status}")
201204
return candidates

py-src/data_formulator/agents/agent_data_clean_stream.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ def stream(self, prompt, artifacts=[], dialog=[]):
183183
'content': content
184184
}
185185

186-
logger.info(user_prompt)
186+
logger.debug(user_prompt)
187+
logger.info(f"[DataCleanAgent] run start (streaming)")
187188

188189
system_message = {
189190
'role': 'system',
@@ -211,8 +212,8 @@ def stream(self, prompt, artifacts=[], dialog=[]):
211212
yield delta.content
212213

213214
# Parse the final content the same way as the non-streaming version
214-
logger.info("\n=== Python Data Clean Agent Stream ===>\n")
215-
logger.info(accumulated_content + "\n")
215+
logger.debug("\n=== Python Data Clean Agent Stream ===>\n")
216+
logger.debug(accumulated_content + "\n")
216217

217218
# Parse table sections from the accumulated content
218219
tables = parse_table_sections(accumulated_content)
@@ -228,6 +229,7 @@ def stream(self, prompt, artifacts=[], dialog=[]):
228229

229230
result['dialog'] = [*messages, {"role": "assistant", "content": accumulated_content}]
230231
result['agent'] = 'DataCleanAgentStream'
232+
logger.info(f"[DataCleanAgent] run done | status={result.get('status', '?')}")
231233

232234
# add a newline to the beginning of the result to separate it from the previous result
233235
yield '\n' + json.dumps(result) + '\n'

py-src/data_formulator/agents/agent_data_load.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ def run(self, input_data, n=1):
175175

176176
user_query = f"[DATA]\n\n{data_summary}\n\n[OUTPUT]"
177177

178-
logger.info(user_query)
178+
logger.debug(user_query)
179+
logger.info(f"[DataLoadAgent] run start")
179180

180181
messages = [{"role":"system", "content": SYSTEM_PROMPT},
181182
{"role":"user","content": user_query}]
@@ -185,11 +186,11 @@ def run(self, input_data, n=1):
185186
candidates = []
186187
for choice in response.choices:
187188

188-
logger.info("\n=== Data load result ===>\n")
189-
logger.info(choice.message.content + "\n")
189+
logger.debug("\n=== Data load result ===>\n")
190+
logger.debug(choice.message.content + "\n")
190191

191192
json_blocks = extract_json_objects(choice.message.content + "\n")
192-
logger.info(json_blocks)
193+
logger.debug(json_blocks)
193194

194195
if len(json_blocks) > 0:
195196
result = {'status': 'ok', 'content': json_blocks[0]}
@@ -206,4 +207,6 @@ def run(self, input_data, n=1):
206207

207208
candidates.append(result)
208209

210+
status = candidates[0].get('status', '?') if candidates else 'empty'
211+
logger.info(f"[DataLoadAgent] run done | status={status}")
209212
return candidates

0 commit comments

Comments
 (0)