Skip to content

Commit b093c01

Browse files
authored
Merge pull request #44 from microsoft/dev
[deploy] updates to enable provide cleaning instructions
2 parents e26816e + 5e1f848 commit b093c01

8 files changed

Lines changed: 67 additions & 25 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Play with Data Formulator with one of the following options:
5858

5959
Data Formulator will be automatically opened in the browser at [http://localhost:5000](http://localhost:5000).
6060

61+
*Update: you can specify the port number (e.g., 8080) by `python -m data_formulator --port 8080` if the default port is occupied.*
62+
6163
- **Option 2: Codespaces (5 minutes)**
6264

6365
You can also run Data Formulator in Codespaces; we have everything pre-configured. For more details, see [CODESPACES.md](CODESPACES.md).

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"@mui/icons-material": "^5.14.0",
1111
"@mui/material": "^5.6.0",
1212
"@reduxjs/toolkit": "^1.8.6",
13+
"@types/dompurify": "^3.0.5",
14+
"@types/validator": "^13.12.2",
1315
"ag-grid-community": "^32.0.2",
1416
"ag-grid-enterprise": "^32.0.2",
1517
"ag-grid-react": "^32.0.2",

py-src/data_formulator/agents/agent_data_clean.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
- the csv table should have the same number of cells for each line, according to the title. If there are some rows with missing values, patch them with empty cells.
4646
- if the raw data has some rows that do not belong to the table, also remove them (e.g., subtitles in between rows)
4747
- if the header row misses some columns, add their corresponding column names. E.g., when the header doesn't have an index column, but every row has an index value, add the missing column header.
48+
* clean up messy column names:
49+
- if the column name contains special characters like "*", "?", "#", "." remove them.
4850
* clean up columns with messy information
4951
- if a column is number but some cells has annotations like "*" "?" or brackets, clean them up.
5052
- if a column is number but has units like ($, %, s), convert them to number (make sure unit conversion is correct when multiple units exist like minute and second) and include unit in the header.
@@ -80,7 +82,7 @@ def __init__(self, client, model):
8082
self.model = model
8183
self.client = client
8284

83-
def run(self, content_type, raw_data):
85+
def run(self, content_type, raw_data, image_cleaning_instruction):
8486
"""derive a new concept based on the raw input data
8587
"""
8688

@@ -93,6 +95,12 @@ def run(self, content_type, raw_data):
9395
}]
9496
}
9597
elif content_type == "image":
98+
# add additional cleaning instructions if provided
99+
if image_cleaning_instruction:
100+
cleaning_prompt = f"\n\n[CLEANING INSTRUCTION]\n\n{image_cleaning_instruction}\n\n"
101+
else:
102+
cleaning_prompt = ""
103+
96104
user_prompt = {
97105
'role': 'user',
98106
'content': [ {
@@ -107,7 +115,7 @@ def run(self, content_type, raw_data):
107115
},
108116
{
109117
'type': 'text',
110-
'text': '''[OUTPUT]\n\n'''
118+
'text': f'''{cleaning_prompt}[OUTPUT]\n\n'''
111119
},
112120
]
113121
}

py-src/data_formulator/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def clean_data_request():
295295

296296
agent = DataCleanAgent(client=client, model=model)
297297

298-
candidates = agent.run(content['content_type'], content["raw_data"])
298+
candidates = agent.run(content['content_type'], content["raw_data"], content["image_cleaning_instruction"])
299299

300300
candidates = [c for c in candidates if c['status'] == 'ok']
301301

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "data_formulator"
7-
version = "0.1.3b"
7+
version = "0.1.3c"
88

99
requires-python = ">=3.9"
1010
authors = [

src/data/utils.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ export const createTableFromFromObjectArray = (title: string, values: any[], der
6666
}
6767
return newName;
6868
}
69+
// clean up messy column names
70+
if (name && name.includes(".")) {
71+
return name.replace(".", "_");
72+
}
6973
return name;
7074
})
7175

src/views/TableSelectionView.tsx

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ export const TableCopyDialogV2: React.FC<TableCopyDialogProps> = ({ buttonElemen
417417
const [tableName, setTableName] = useState<string>("");
418418

419419
const [tableContent, setTableContent] = useState<string>("");
420+
const [imageCleaningInstr, setImageCleaningInstr] = useState<string>("");
420421
const [tableContentType, setTableContentType] = useState<'text' | 'image'>('text');
421422

422423
const [cleaningInProgress, setCleaningInProgress] = useState<boolean>(false);
@@ -476,6 +477,7 @@ export const TableCopyDialogV2: React.FC<TableCopyDialogProps> = ({ buttonElemen
476477
token: token,
477478
content_type: tableContentType,
478479
raw_data: tableContent,
480+
image_cleaning_instruction: imageCleaningInstr,
479481
model: activeModel
480482
}),
481483
};
@@ -652,27 +654,33 @@ export const TableCopyDialogV2: React.FC<TableCopyDialogProps> = ({ buttonElemen
652654
label="data content" variant="outlined" multiline minRows={15}
653655
/>
654656
:
655-
<Box sx={{marginTop: 1, position: 'relative'}}>
656-
{cleaningInProgress ? <LinearProgress sx={{ width: '100%', height: "calc(100% - 4px)", opacity: 0.1, position: 'absolute', zIndex: 1 }} /> : ""}
657-
<IconButton size="small" color="primary"
658-
sx={{ backgroundColor: 'white',
659-
width: 16, height: 16, boxShadow: 3,
660-
position: 'absolute', right: 4, top: 4,
661-
"&:hover": { backgroundColor: "white", boxShadow: 8, transform: "translate(0.5px, -0.5px)" }
662-
}}
663-
onClick={() => {
664-
setTableContent("");
665-
setTableContentType("text");
666-
}}
667-
>
668-
<CancelIcon sx={{fontSize: 16}} />
669-
</IconButton>
670-
{validator.isURL(tableContent) || validator.isDataURI(tableContent) ? (
671-
<img style={{border: '1px lightgray solid', borderRadius: 4, maxWidth: 640, maxHeight: 360}}
672-
src={DOMPurify.sanitize(tableContent)} alt="the image is corrupted, please try again." />
673-
) : (
674-
<Typography color="error">Invalid image data</Typography>
675-
)}
657+
<Box sx={{display: 'flex', flexDirection: 'column', alignItems: 'center'}}>
658+
<Box sx={{marginTop: 1, position: 'relative'}}>
659+
{cleaningInProgress ? <LinearProgress sx={{ width: '100%', height: "calc(100% - 4px)", opacity: 0.1, position: 'absolute', zIndex: 1 }} /> : ""}
660+
<IconButton size="small" color="primary"
661+
sx={{ backgroundColor: 'white',
662+
width: 16, height: 16, boxShadow: 3,
663+
position: 'absolute', right: 4, top: 4,
664+
"&:hover": { backgroundColor: "white", boxShadow: 8, transform: "translate(0.5px, -0.5px)" }
665+
}}
666+
onClick={() => {
667+
setTableContent("");
668+
setTableContentType("text");
669+
setImageCleaningInstr("");
670+
}}
671+
>
672+
<CancelIcon sx={{fontSize: 16}} />
673+
</IconButton>
674+
{validator.isURL(tableContent) || validator.isDataURI(tableContent) ? (
675+
<img style={{border: '1px lightgray solid', borderRadius: 4, maxWidth: 640, maxHeight: 360}}
676+
src={DOMPurify.sanitize(tableContent)} alt="the image is corrupted, please try again." />
677+
) : (
678+
<Typography color="error">Invalid image data</Typography>
679+
)}
680+
</Box>
681+
<TextField fullWidth size="small" sx={{ marginTop: 1, "& .MuiInputBase-input" : {fontSize: 14, lineHeight: 1.2 }}}
682+
value={imageCleaningInstr} onChange={(event) => { setImageCleaningInstr(event.target.value); }}
683+
variant="standard" placeholder='additional cleaning instructions' />
676684
</Box>)
677685
}
678686
</Box>
@@ -708,6 +716,7 @@ export const TableCopyDialogV2: React.FC<TableCopyDialogProps> = ({ buttonElemen
708716
{"upload"}
709717
</Button>
710718
</DialogActions>
719+
711720
</Dialog>;
712721

713722
return <>

yarn.lock

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,13 @@
968968
"@types/d3-transition" "*"
969969
"@types/d3-zoom" "*"
970970

971+
"@types/dompurify@^3.0.5":
972+
version "3.0.5"
973+
resolved "https://registry.npmjs.org/@types/dompurify/-/dompurify-3.0.5.tgz#02069a2fcb89a163bacf1a788f73cb415dd75cb7"
974+
integrity sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==
975+
dependencies:
976+
"@types/trusted-types" "*"
977+
971978
"@types/estree@1.0.5", "@types/estree@^1.0.0":
972979
version "1.0.5"
973980
resolved "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4"
@@ -1056,11 +1063,21 @@
10561063
resolved "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.3.tgz#cef09e3ec9af1d63d2a6cc5b383a737e24e6dcf5"
10571064
integrity sha512-5cJ8CB4yAx7BH1oMvdU0Jh9lrEXyPkar6F9G/ERswkCuvP4KQZfZkSjcMbAICCpQTN4OuZn8tz0HiKv9TGZgrQ==
10581065

1066+
"@types/trusted-types@*":
1067+
version "2.0.7"
1068+
resolved "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz#baccb07a970b91707df3a3e8ba6896c57ead2d11"
1069+
integrity sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==
1070+
10591071
"@types/use-sync-external-store@^0.0.3":
10601072
version "0.0.3"
10611073
resolved "https://registry.npmjs.org/@types/use-sync-external-store/-/use-sync-external-store-0.0.3.tgz"
10621074
integrity sha512-EwmlvuaxPNej9+T4v5AuBPJa2x2UOJVdjCtDHgcDqitUeOtjnJKJ+apYjVcAoBEMjKW1VVFGZLUb5+qqa09XFA==
10631075

1076+
"@types/validator@^13.12.2":
1077+
version "13.12.2"
1078+
resolved "https://registry.npmjs.org/@types/validator/-/validator-13.12.2.tgz#760329e756e18a4aab82fc502b51ebdfebbe49f5"
1079+
integrity sha512-6SlHBzUW8Jhf3liqrGGXyTJSIFe4nqlJ5A5KaMZ2l/vbM3Wh3KSybots/wfWVzNLK4D1NZluDlSQIbIEPx6oyA==
1080+
10641081
"@vitejs/plugin-react-swc@^3.7.0":
10651082
version "3.7.0"
10661083
resolved "https://registry.yarnpkg.com/@vitejs/plugin-react-swc/-/plugin-react-swc-3.7.0.tgz#e456c0a6d7f562268e1d231af9ac46b86ef47d88"

0 commit comments

Comments
 (0)