Skip to content

Commit 33774dd

Browse files
committed
cleanup
1 parent c1370b9 commit 33774dd

16 files changed

Lines changed: 1893 additions & 1353 deletions

File tree

py-src/data_formulator/agents/agent_data_rec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@
7171
|---|---|---|
7272
| Scatter Plot | x, y, color, size, facet | opacity (0.1–1.0) |
7373
| Regression | x, y, color, size, facet | regressionMethod ("linear","log","exp","pow","quad","poly"), polyOrder (2–10) |
74-
| Bar Chart | x, y, color, facet | cornerRadius (0–15) |
75-
| Grouped Bar Chart | x, y, group, facet | cornerRadius (0–15) |
74+
| Bar Chart | x, y, color, facet | |
75+
| Grouped Bar Chart | x, y, group, facet | |
7676
| Line Chart | x, y, color, strokeDash, facet | interpolate ("linear","monotone","step") |
7777
| Area Chart | x, y, color, facet | — |
7878
| Heatmap | x, y, color, facet | colorScheme ("viridis","blues","reds","oranges","greens","blueorange","redblue") |

py-src/data_formulator/datalake/file_manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from pathlib import Path
1616
from typing import BinaryIO, Union
1717

18+
from werkzeug.utils import secure_filename
19+
1820
from data_formulator.datalake.metadata import TableMetadata
1921
from data_formulator.datalake.workspace import Workspace
2022

@@ -158,6 +160,11 @@ def save_uploaded_file(
158160
Raises:
159161
ValueError: If file type is not supported
160162
"""
163+
# Sanitize filename to prevent path traversal (defence-in-depth)
164+
filename = secure_filename(filename)
165+
if not filename:
166+
raise ValueError("Invalid filename after sanitization")
167+
161168
# Validate file type
162169
file_type = get_file_type(filename)
163170
if file_type is None:

py-src/data_formulator/tables_routes.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from flask import request, jsonify, Blueprint, Response
1313
import pandas as pd
1414
from pathlib import Path
15+
from werkzeug.utils import secure_filename
1516

1617
from data_formulator.data_loader import DATA_LOADERS
1718
from data_formulator.auth import get_identity_id
@@ -418,10 +419,13 @@ def create_table():
418419
file = request.files['file']
419420
if not file.filename or not is_supported_file(file.filename):
420421
return jsonify({"status": "error", "message": "Unsupported file format"}), 400
422+
safe_name = secure_filename(file.filename)
423+
if not safe_name:
424+
return jsonify({"status": "error", "message": "Invalid filename"}), 400
421425
meta = save_uploaded_file(
422426
workspace,
423427
file.stream,
424-
file.filename,
428+
safe_name,
425429
table_name=sanitized_table_name,
426430
overwrite=False,
427431
)

src/app/App.tsx

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
871871

872872
const dispatch = useDispatch<AppDispatch>();
873873
const viewMode = useSelector((state: DataFormulatorState) => state.viewMode);
874+
const tables = useSelector((state: DataFormulatorState) => state.tables);
874875
const generatedReports = useSelector((state: DataFormulatorState) => state.generatedReports);
875876
const focusedId = useSelector((state: DataFormulatorState) => state.focusedId);
876877
const serverConfig = useSelector((state: DataFormulatorState) => state.serverConfig);
@@ -1002,14 +1003,13 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
10021003
});
10031004

10041005
// Check if we're on the about page
1005-
const isAboutPage = (window.location.pathname === '/about'
1006-
|| (window.location.pathname === '/' && serverConfig.PROJECT_FRONT_PAGE));
1006+
const isAboutPage = window.location.pathname === '/about';
10071007
const isGalleryPage = window.location.pathname === '/gallery';
10081008
const isAppPage = !isAboutPage && !isGalleryPage;
10091009

10101010
let appBar = [
10111011
<AppBar position="static" key="app-bar-main" >
1012-
<Toolbar variant="dense" sx={{height: 40, minHeight: 36}}>
1012+
<Toolbar variant="dense" sx={{height: 40, minHeight: 36, position: 'relative'}}>
10131013
<Button sx={{
10141014
display: "flex", flexDirection: "row", textTransform: "none",
10151015
alignItems: 'stretch',
@@ -1021,7 +1021,7 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
10211021
<Box component="img" sx={{ height: 20, mr: 0.5 }} alt="" src={dfLogo} />
10221022
<Typography noWrap component="h1" sx={{ fontWeight: 300, display: { xs: 'none', sm: 'block' }, letterSpacing: '0.03em' }}>
10231023
{toolName}
1024-
</Typography>
1024+
</Typography>
10251025
</Button>
10261026
<Box
10271027
sx={{
@@ -1101,6 +1101,11 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
11011101
Gallery
11021102
</Button>
11031103
</Box>
1104+
{tables.length === 0 && (
1105+
<Typography noWrap sx={{ position: 'absolute', left: '50%', transform: 'translateX(-50%)', fontWeight: 500, fontSize: '0.65rem', color: 'text.disabled', letterSpacing: '0.15em', textTransform: 'uppercase' }}>
1106+
Microsoft Research
1107+
</Typography>
1108+
)}
11041109
{isAppPage && (
11051110
<Box sx={{ display: 'flex', ml: 'auto', fontSize: 14 }}>
11061111
{focusedId !== undefined && <React.Fragment><ToggleButtonGroup
@@ -1150,7 +1155,7 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
11501155
<Typography fontSize="inherit" sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
11511156
<SessionMenu />
11521157
</Typography>
1153-
<ResetDialog />
1158+
{tables.length > 0 && <ResetDialog />}
11541159
</Box>
11551160
)}
11561161
{isAboutPage && (
@@ -1259,7 +1264,7 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
12591264
},
12601265
{
12611266
path: "/",
1262-
element: serverConfig.PROJECT_FRONT_PAGE ? <About /> : <DataFormulatorFC />,
1267+
element: <DataFormulatorFC />,
12631268
}, {
12641269
path: "*",
12651270
element: <DataFormulatorFC />,

src/app/dfSlice.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,7 @@ export const selectRefreshConfigs = createSelector(
13281328
canRefresh: t.source?.canRefresh ?? false,
13291329
url: t.source?.url,
13301330
hasVirtual: !!t.virtual?.tableId,
1331+
hasDerive: !!t.derive,
13311332
})),
13321333
{
13331334
memoizeOptions: {
@@ -1338,7 +1339,8 @@ export const selectRefreshConfigs = createSelector(
13381339
if (a.id !== b.id || a.autoRefresh !== b.autoRefresh ||
13391340
a.refreshIntervalSeconds !== b.refreshIntervalSeconds ||
13401341
a.sourceType !== b.sourceType || a.canRefresh !== b.canRefresh ||
1341-
a.url !== b.url || a.hasVirtual !== b.hasVirtual) return false;
1342+
a.url !== b.url || a.hasVirtual !== b.hasVirtual ||
1343+
a.hasDerive !== b.hasDerive) return false;
13421344
}
13431345
return true;
13441346
}

src/app/useDataRefresh.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,12 @@ export function useDataRefresh() {
340340

341341
// Set up new refresh schedules
342342
refreshConfigs.forEach((config) => {
343+
// Skip derived tables — they are refreshed by useDerivedTableRefresh
344+
// when their source tables change, not by polling an external source.
343345
const shouldAutoRefresh = config.autoRefresh &&
344346
config.refreshIntervalSeconds &&
345-
config.refreshIntervalSeconds > 0;
347+
config.refreshIntervalSeconds > 0 &&
348+
!config.hasDerive;
346349

347350
if (shouldAutoRefresh) {
348351
const intervalMs = config.refreshIntervalSeconds! * 1000;

src/lib/agents-chart/core/compute-layout.ts

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -691,11 +691,16 @@ export function computeChannelBudgets(
691691
let maxYToKeep = Math.floor(maxSubplotH / yMinGroupStep);
692692

693693
// --- 5. Faceted-chart canvas cap ---
694-
// Each subplot's step-based width/height must fit in the (un-stretched)
695-
// canvas dimensions. Without this, a busy discrete axis produces
696-
// subplots wider than the canvas, causing clipping. Filtering values
697-
// yields narrower subplots; we then re-derive the facet grid so more
698-
// columns fit, reducing overall chart height.
694+
// When a busy discrete axis makes each subplot wider than the
695+
// un-stretched canvas, cap axis items to fit within one canvas
696+
// width/height. This lets subplots be narrower, potentially fitting
697+
// more facet columns — reducing overall chart height.
698+
//
699+
// Example: 70 counties on X × 20 states on column. Without the cap,
700+
// minSubplotWidth = 70 × 6 = 420 → only 1 facet column fits → each
701+
// state stacks vertically → excessively tall chart. With the cap,
702+
// X is truncated to floor(400/6) = 66 items, and the facet grid is
703+
// re-derived with narrower subplots so more columns fit.
699704
if (facetGrid) {
700705
const canvasXCap = Math.max(1, Math.floor(canvasSize.width / xMinGroupStep));
701706
const canvasYCap = Math.max(1, Math.floor(canvasSize.height / yMinGroupStep));
@@ -704,17 +709,24 @@ export function computeChannelBudgets(
704709
maxXToKeep = Math.min(maxXToKeep, canvasXCap);
705710
maxYToKeep = Math.min(maxYToKeep, canvasYCap);
706711

707-
// Re-derive facet grid with the tighter subplot size.
712+
// With tighter axis items, subplots can be narrower, so more
713+
// facet columns may fit. Re-derive the grid for column-only
714+
// wrapping (the most affected case).
708715
const colField = channelSemantics.column?.field;
709716
const rowField = channelSemantics.row?.field;
710717
const colCount = colField
711718
? new Set(data.map(r => r[colField])).size : 0;
712719

713720
if (colCount > 1 && !rowField) {
714-
const tighterW = canvasXCap * xMinGroupStep;
721+
const tighterW = Math.max(
722+
options.minSubplotSize ?? 60,
723+
maxXToKeep * xMinGroupStep,
724+
);
715725
const totalW = canvasSize.width * maxStretchVal - fixW;
716726
const totalH = canvasSize.height * maxStretchVal - fixH;
717-
const revisedMaxCols = Math.max(1, Math.floor(totalW / (tighterW + gap)));
727+
const revisedMaxCols = Math.max(1, Math.floor(
728+
totalW / (tighterW + gap),
729+
));
718730
const revisedMaxRows = Math.max(1, Math.floor(
719731
totalH / ((options.minSubplotSize ?? 60) + gap),
720732
));
@@ -730,18 +742,14 @@ export function computeChannelBudgets(
730742
}
731743
}
732744

733-
const hasRow = !!channelSemantics.row?.field;
734-
const maxFacetColumns = facetGrid?.maxColumnValues ?? Infinity;
735-
const maxFacetRows = facetGrid?.maxRowValues ?? Infinity;
736-
const maxFacetTotal = facetGrid
737-
? maxFacetColumns * (facetGrid.maxRowValues ?? 1)
738-
: Infinity;
739-
745+
// maxColumnValues already carries the correct semantics for both
746+
// column+row (per-dimension cap) and column-only wrapping (total
747+
// panel count = grid cols × grid rows). No multiplication needed.
740748
const maxValues: Record<string, number> = {
741749
x: maxXToKeep,
742750
y: maxYToKeep,
743-
column: hasRow ? maxFacetColumns : maxFacetTotal,
744-
row: maxFacetRows,
751+
column: facetGrid?.maxColumnValues ?? Infinity,
752+
row: facetGrid?.maxRowValues ?? Infinity,
745753
color: maxColorVal,
746754
};
747755

src/lib/agents-chart/core/field-semantics.ts

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -628,14 +628,95 @@ function mergeIntrinsicWithData(
628628
}
629629

630630
/**
631-
* Resolve domain constraints from annotation, type-intrinsic rules, or data.
631+
* Snap-to-bound heuristic for bounded types like Percentage / PercentageChange.
632+
*
633+
* Each bound is snapped independently:
634+
* - If data approaches the intrinsic lower bound → snap min
635+
* - If data approaches the intrinsic upper bound → snap max
636+
* - If data exceeds a bound → don't snap that side (let VL auto-extend)
637+
*
638+
* Threshold: 25% of the *effective side range*.
639+
*
640+
* We err on the side of snapping, because:
641+
* - Semantic types are opt-in — the bound carries meaning by definition.
642+
* - A wrong snap (extra white space) is less harmful than a wrong
643+
* no-snap (viewer loses semantic reference, differences are
644+
* exaggerated and proximity to the bound is hidden).
645+
* - Only when data is clearly in the interior (> 25% away from each
646+
* bound) does the bound stop being a useful reference.
632647
*
633-
* The effective domain is always the **union** of the intrinsic (semantic)
634-
* domain and the actual data range, so that data points outside the
635-
* type's natural bounds (e.g., Percentage > 100 %) are never clipped.
648+
* When the intrinsic domain straddles zero (lo < 0 < hi), zero acts as a
649+
* visual baseline (bar charts, contextual zero). Each bound's threshold
650+
* is computed relative to its distance from zero — not the full range —
651+
* so that snapping one side doesn't make values on the other side of zero
652+
* invisible (e.g., snapping to -100% when data has a tiny +0.2% bar).
653+
*
654+
* When the domain doesn't straddle zero (e.g., [0, 100]), the full range
655+
* is used as the reference.
656+
*
657+
* Examples for Percentage [0, 100] (threshold = 25, full range):
658+
* 20–45% → snap min=0 only (20 within 25 of 0; 45 far from 100)
659+
* 35–65% → no snap (both far from edges, in interior)
660+
* 55–82% → snap max=100 only (82 within 25 of 100; 55 far from 0)
661+
* 15–80% → snap both [0, 100] (15 near 0, 80 near 100)
662+
* 30–130% → no snap (130 exceeds 100 → no snap; 30 far from 0)
663+
*
664+
* Examples for PercentageChange [-1, 1] (threshold = 0.25 per side):
665+
* -0.03 to +0.05 → no snap (both far from ±0.75)
666+
* -0.70 to +0.30 → no snap (-0.70 > -0.75, not close enough)
667+
* -0.80 to +0.30 → snap min=-1 (-0.80 ≤ -0.75; +0.30 < 0.75)
668+
* -0.80 to +0.78 → snap both (both within 0.25 of edges)
669+
*/
670+
export function snapToBoundHeuristic(
671+
intrinsic: [number, number],
672+
values: any[],
673+
): DomainConstraint | undefined {
674+
const nums = values.filter((v: any) => typeof v === 'number' && !isNaN(v));
675+
if (nums.length === 0) return undefined;
676+
677+
const [lo, hi] = intrinsic;
678+
const range = hi - lo;
679+
if (range <= 0) return undefined;
680+
681+
const dataMin = Math.min(...nums);
682+
const dataMax = Math.max(...nums);
683+
684+
// When the domain straddles zero, compute each side's threshold relative
685+
// to its distance from zero. This prevents snapping one side from
686+
// stretching the axis so wide that values near zero on the other side
687+
// become invisible (sub-pixel bars).
688+
const zeroInside = lo < 0 && hi > 0;
689+
const thresholdLo = 0.25 * (zeroInside ? (0 - lo) : range);
690+
const thresholdHi = 0.25 * (zeroInside ? hi : range);
691+
692+
let snapMin: number | undefined;
693+
let snapMax: number | undefined;
694+
695+
// Snap lower bound: data min is close to intrinsic lower bound
696+
// AND data doesn't go below it (if it does, VL auto-extends)
697+
if (dataMin >= lo && dataMin <= lo + thresholdLo) {
698+
snapMin = lo;
699+
}
700+
701+
// Snap upper bound: data max is close to intrinsic upper bound
702+
// AND data doesn't exceed it
703+
if (dataMax <= hi && dataMax >= hi - thresholdHi) {
704+
snapMax = hi;
705+
}
706+
707+
if (snapMin === undefined && snapMax === undefined) return undefined;
708+
709+
return { min: snapMin, max: snapMax, clamp: false };
710+
}
711+
712+
/**
713+
* Resolve domain constraints from annotation, type-intrinsic rules, or data.
636714
*
637715
* Only truly fixed physical domains (Latitude, Longitude, Correlation)
638-
* use hard clamping.
716+
* use hard clamping. Bounded types like Percentage use a snap-to-bound
717+
* heuristic: the axis extends to the theoretical endpoint (e.g., 100%)
718+
* only when data is close to it, avoiding wasted space when data is
719+
* concentrated in a small region.
639720
*
640721
* Priority: annotation.intrinsicDomain > type-intrinsic > data-inferred
641722
*/
@@ -644,8 +725,18 @@ export function resolveDomainConstraint(
644725
annotation: SemanticAnnotation,
645726
values: any[],
646727
): DomainConstraint | undefined {
647-
// 1. Explicit annotation intrinsicDomain — soft merge with data
728+
const entry = getRegistryEntry(semanticType);
729+
730+
// 1. Explicit annotation intrinsicDomain
648731
if (annotation.intrinsicDomain) {
732+
// Proportion (Percentage) and SignedMeasure (PercentageChange, Profit):
733+
// use snap-to-bound heuristic on both ends independently.
734+
// Don't force the full theoretical range — only snap to a bound
735+
// when data approaches it (e.g., 97% → snap to 100%, -0.95 → snap to -1).
736+
if (entry.t1 === 'Proportion' || entry.t1 === 'SignedMeasure') {
737+
return snapToBoundHeuristic(annotation.intrinsicDomain, values);
738+
}
739+
// All other types: soft merge (union of intrinsic + data)
649740
return mergeIntrinsicWithData(annotation.intrinsicDomain, values, false);
650741
}
651742

@@ -654,14 +745,13 @@ export function resolveDomainConstraint(
654745
if (semanticType === 'Longitude') return mergeIntrinsicWithData([-180, 180], values, true);
655746
if (semanticType === 'Correlation') return mergeIntrinsicWithData([-1, 1], values, true);
656747

657-
// 3. Data-inferred intrinsic domain for Percentage / Rate
658-
// Detect scale (0–1 fractional vs 0–100 whole-number) then soft-merge.
748+
// 3. Percentage without explicit annotation — detect scale and apply snap
659749
if (semanticType === 'Percentage') {
660750
const nums = values.filter((v: any) => typeof v === 'number' && !isNaN(v));
661751
if (nums.length > 0) {
662752
const rep = detectPercentageRepresentation(nums);
663-
const intrinsicMax = rep === '0-1' ? 1 : 100;
664-
return mergeIntrinsicWithData([0, intrinsicMax], nums, false);
753+
const M = rep === '0-1' ? 1 : 100;
754+
return snapToBoundHeuristic([0, M], values);
665755
}
666756
}
667757

src/lib/agents-chart/core/type-registry.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ const TYPE_REGISTRY: Record<string, TypeRegistryEntry> = {
115115

116116
// --- Measure: SignedMeasure ---
117117
Profit: { t0: 'Measure', t1: 'SignedMeasure', visEncodings: ['quantitative'], aggRole: 'signed-additive', domainShape: 'open', diverging: 'conditional', formatClass: 'signed-currency', zeroBaseline: 'meaningful', zeroPad: 0 },
118-
PercentageChange: { t0: 'Measure', t1: 'SignedMeasure', visEncodings: ['quantitative'], aggRole: 'intensive', domainShape: 'open', diverging: 'conditional', formatClass: 'signed-percent', zeroBaseline: 'meaningful', zeroPad: 0 },
118+
PercentageChange: { t0: 'Measure', t1: 'SignedMeasure', visEncodings: ['quantitative'], aggRole: 'intensive', domainShape: 'open', diverging: 'conditional', formatClass: 'signed-percent', zeroBaseline: 'contextual', zeroPad: 0.05 },
119119
Sentiment: { t0: 'Measure', t1: 'SignedMeasure', visEncodings: ['quantitative'], aggRole: 'intensive', domainShape: 'open', diverging: 'inherent', formatClass: 'signed-decimal', zeroBaseline: 'meaningful', zeroPad: 0 },
120120
Correlation: { t0: 'Measure', t1: 'SignedMeasure', visEncodings: ['quantitative'], aggRole: 'intensive', domainShape: 'bounded', diverging: 'inherent', formatClass: 'signed-decimal', zeroBaseline: 'meaningful', zeroPad: 0 },
121121

0 commit comments

Comments
 (0)