Skip to content

Commit 425682f

Browse files
authored
Add OUTPUT as an alias for REPLACE in Lookup (#5049)
1 parent ff82c67 commit 425682f

5 files changed

Lines changed: 195 additions & 4 deletions

File tree

docs/user/ppl/cmd/lookup.md

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The `lookup` command enriches search data by adding or replacing values from a l
88
The `lookup` command has the following syntax:
99

1010
```syntax
11-
lookup <lookupIndex> (<lookupMappingField> [as <sourceMappingField>])... [(replace | append) (<inputField> [as <outputField>])...]
11+
lookup <lookupIndex> (<lookupMappingField> [as <sourceMappingField>])... [(replace | append | output) (<inputField> [as <outputField>])...]
1212
```
1313

1414
The following are examples of the `lookup` command syntax:
@@ -21,6 +21,8 @@ source = table1 | lookup table2 id as cid, name replace dept as department
2121
source = table1 | lookup table2 id as cid, name replace dept as department, city as location
2222
source = table1 | lookup table2 id as cid, name append dept as department
2323
source = table1 | lookup table2 id as cid, name append dept as department, city as location
24+
source = table1 | lookup table2 id as cid, name output dept as department
25+
source = table1 | lookup table2 id as cid, name output dept as department, city as location
2426
```
2527

2628
## Parameters
@@ -34,7 +36,7 @@ The `lookup` command supports the following parameters.
3436
| `<sourceMappingField>` | Optional | A key from the source data (left side) used for matching, similar to a join key in the left table. Default is `lookupMappingField`. |
3537
| `<inputField>` | Optional | A field in the lookup index whose matched values are applied to the results (output). Specify multiple fields as a comma-separated list. If not specified, all fields except `lookupMappingField` from the lookup index are applied to the results. |
3638
| `<outputField>` | Optional | The name of the field in the results (output) in which matched values are placed. Specify multiple fields as a comma-separated list. If the `outputField` specifies an existing field in the source query, its values are replaced or appended with matched values from the `inputField`. If the field specified in the `outputField` is not an existing field, a new field is added to the results when using `replace`, or the operation fails when using `append`. |
37-
| `(replace | append)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. Default is `replace`. |
39+
| `(replace \| append \| output)` | Optional | Specifies how matched values are applied to the output. `replace` overwrites existing values with matched values from the lookup index. `append` fills only missing values in the results with matched values from the lookup index. `output` is a synonym for `replace` (provided for SPL compatibility). Default is `replace`. |
3840

3941
## Example 1: Replace existing values
4042

@@ -121,4 +123,29 @@ The query returns the following results:
121123
| 1005 | Jane | Scientist | Canada | 90000 | Engineer |
122124
| 1002 | John | Doctor | Canada | 120000 | Scientist |
123125
+------+-------+------------+---------+--------+-----------+
124-
```
126+
```
127+
128+
## Example 5: Using OUTPUT keyword
129+
130+
The `OUTPUT` keyword is a synonym for `REPLACE`. The following query demonstrates using `OUTPUT` to overwrite existing values:
131+
132+
```ppl ignore
133+
source = worker
134+
| LOOKUP work_information uid AS id OUTPUT department
135+
| fields id, name, occupation, country, salary, department
136+
```
137+
138+
This query produces the same results as Example 1 (using `REPLACE`):
139+
140+
```text
141+
+------+-------+------------+---------+--------+------------+
142+
| id | name | occupation | country | salary | department |
143+
|------+-------+------------+---------+--------+------------|
144+
| 1000 | Jake | Engineer | England | 100000 | IT |
145+
| 1001 | Hello | Artist | USA | 70000 | null |
146+
| 1002 | John | Doctor | Canada | 120000 | DATA |
147+
| 1003 | David | Doctor | null | 120000 | HR |
148+
| 1004 | David | null | Canada | 0 | null |
149+
| 1005 | Jane | Scientist | Canada | 90000 | DATA |
150+
+------+-------+------------+---------+--------+------------+
151+
```

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLLookupIT.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,4 +438,59 @@ public void testRnameAsIdShouldnWork() throws IOException {
438438
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
439439
verifyNumOfRows(result, 6);
440440
}
441+
442+
@Test
443+
public void testUidAsIdOutputDepartment() throws IOException {
444+
// OUTPUT is a synonym for REPLACE (SPL compatibility)
445+
JSONObject result =
446+
executeQuery(
447+
String.format(
448+
"source = %s"
449+
+ "| LOOKUP %s uid AS id OUTPUT department"
450+
+ "| fields id, name, occupation, country, salary, department",
451+
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
452+
verifySchema(
453+
result,
454+
schema("id", "int"),
455+
schema("name", "string"),
456+
schema("occupation", "string"),
457+
schema("country", "string"),
458+
schema("salary", "int"),
459+
schema("department", "string"));
460+
verifyDataRows(
461+
result,
462+
rows(1000, "Jake", "Engineer", "England", 100000, "IT"),
463+
rows(1001, "Hello", "Artist", "USA", 70000, null),
464+
rows(1002, "John", "Doctor", "Canada", 120000, "DATA"),
465+
rows(1003, "David", "Doctor", null, 120000, "HR"),
466+
rows(1004, "David", null, "Canada", 0, null),
467+
rows(1005, "Jane", "Scientist", "Canada", 90000, "DATA"));
468+
}
469+
470+
@Test
471+
public void testUidAsIdOutputDepartmentAsCountry() throws IOException {
472+
// OUTPUT with field aliasing (SPL compatibility)
473+
JSONObject result =
474+
executeQuery(
475+
String.format(
476+
"source = %s"
477+
+ "| LOOKUP %s uid AS id OUTPUT department AS country"
478+
+ "| fields id, name, occupation, salary, country",
479+
TEST_INDEX_WORKER, TEST_INDEX_WORK_INFORMATION));
480+
verifySchema(
481+
result,
482+
schema("id", "int"),
483+
schema("name", "string"),
484+
schema("occupation", "string"),
485+
schema("salary", "int"),
486+
schema("country", "string"));
487+
verifyDataRows(
488+
result,
489+
rows(1000, "Jake", "Engineer", 100000, "IT"),
490+
rows(1001, "Hello", "Artist", 70000, null),
491+
rows(1002, "John", "Doctor", 120000, "DATA"),
492+
rows(1003, "David", "Doctor", 120000, "HR"),
493+
rows(1004, "David", null, 0, null),
494+
rows(1005, "Jane", "Scientist", 90000, "DATA"));
495+
}
441496
}

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ patternMode
476476

477477
// lookup
478478
lookupCommand
479-
: LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)?
479+
: LOOKUP tableSource lookupMappingList ((APPEND | REPLACE | OUTPUT) outputCandidateList)?
480480
;
481481

482482
lookupMappingList

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,7 @@ public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandCo
972972
@Override
973973
public UnresolvedPlan visitLookupCommand(OpenSearchPPLParser.LookupCommandContext ctx) {
974974
Relation lookupRelation = new Relation(this.internalVisitExpression(ctx.tableSource()));
975+
// OUTPUT and REPLACE are synonyms - both overwrite existing fields
975976
Lookup.OutputStrategy strategy =
976977
ctx.APPEND() != null ? Lookup.OutputStrategy.APPEND : Lookup.OutputStrategy.REPLACE;
977978
java.util.Map<String, String> mappingAliasMap =

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLLookupTest.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,114 @@ public void testReplaceAs() {
122122
verifyPPLToSparkSQL(root, expectedSparkSql);
123123
}
124124

125+
@Test
126+
public void testOutput() {
127+
// OUTPUT is a synonym for REPLACE (for SPL compatibility)
128+
String ppl = "source=EMP | lookup DEPT DEPTNO output LOC";
129+
RelNode root = getRelNode(ppl);
130+
String expectedLogical =
131+
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
132+
+ " COMM=[$6], DEPTNO=[$7], LOC=[$8])\n"
133+
+ " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n"
134+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
135+
+ " LogicalProject(LOC=[$2], DEPTNO=[$0])\n"
136+
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
137+
verifyLogical(root, expectedLogical);
138+
139+
String expectedResult =
140+
"EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;"
141+
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
142+
+ "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;"
143+
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
144+
+ "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;"
145+
+ " COMM=null; DEPTNO=10; LOC=NEW YORK\n"
146+
+ "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00;"
147+
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
148+
+ "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;"
149+
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
150+
+ "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;"
151+
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
152+
+ "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;"
153+
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
154+
+ "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;"
155+
+ " COMM=null; DEPTNO=20; LOC=DALLAS\n"
156+
+ "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;"
157+
+ " COMM=300.00; DEPTNO=30; LOC=CHICAGO\n"
158+
+ "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;"
159+
+ " COMM=500.00; DEPTNO=30; LOC=CHICAGO\n"
160+
+ "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;"
161+
+ " COMM=1400.00; DEPTNO=30; LOC=CHICAGO\n"
162+
+ "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;"
163+
+ " COMM=null; DEPTNO=30; LOC=CHICAGO\n"
164+
+ "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;"
165+
+ " COMM=0.00; DEPTNO=30; LOC=CHICAGO\n"
166+
+ "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;"
167+
+ " COMM=null; DEPTNO=30; LOC=CHICAGO\n";
168+
verifyResult(root, expectedResult);
169+
170+
String expectedSparkSql =
171+
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
172+
+ " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC`\n"
173+
+ "FROM `scott`.`EMP`\n"
174+
+ "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n"
175+
+ "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`";
176+
verifyPPLToSparkSQL(root, expectedSparkSql);
177+
}
178+
179+
@Test
180+
public void testOutputAs() {
181+
// OUTPUT with field aliasing (SPL compatibility)
182+
String ppl = "source=EMP | lookup DEPT DEPTNO output LOC as JOB";
183+
RelNode root = getRelNode(ppl);
184+
String expectedLogical =
185+
"LogicalProject(EMPNO=[$0], ENAME=[$1], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6],"
186+
+ " DEPTNO=[$7], JOB=[$8])\n"
187+
+ " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n"
188+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
189+
+ " LogicalProject(LOC=[$2], DEPTNO=[$0])\n"
190+
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
191+
verifyLogical(root, expectedLogical);
192+
193+
String expectedResult =
194+
"EMPNO=7782; ENAME=CLARK; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; COMM=null; DEPTNO=10;"
195+
+ " JOB=NEW YORK\n"
196+
+ "EMPNO=7839; ENAME=KING; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; COMM=null;"
197+
+ " DEPTNO=10; JOB=NEW YORK\n"
198+
+ "EMPNO=7934; ENAME=MILLER; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; COMM=null;"
199+
+ " DEPTNO=10; JOB=NEW YORK\n"
200+
+ "EMPNO=7369; ENAME=SMITH; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;"
201+
+ " DEPTNO=20; JOB=DALLAS\n"
202+
+ "EMPNO=7566; ENAME=JONES; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; COMM=null;"
203+
+ " DEPTNO=20; JOB=DALLAS\n"
204+
+ "EMPNO=7788; ENAME=SCOTT; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; COMM=null;"
205+
+ " DEPTNO=20; JOB=DALLAS\n"
206+
+ "EMPNO=7876; ENAME=ADAMS; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; COMM=null;"
207+
+ " DEPTNO=20; JOB=DALLAS\n"
208+
+ "EMPNO=7902; ENAME=FORD; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; COMM=null;"
209+
+ " DEPTNO=20; JOB=DALLAS\n"
210+
+ "EMPNO=7499; ENAME=ALLEN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; COMM=300.00;"
211+
+ " DEPTNO=30; JOB=CHICAGO\n"
212+
+ "EMPNO=7521; ENAME=WARD; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; COMM=500.00;"
213+
+ " DEPTNO=30; JOB=CHICAGO\n"
214+
+ "EMPNO=7654; ENAME=MARTIN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; COMM=1400.00;"
215+
+ " DEPTNO=30; JOB=CHICAGO\n"
216+
+ "EMPNO=7698; ENAME=BLAKE; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; COMM=null;"
217+
+ " DEPTNO=30; JOB=CHICAGO\n"
218+
+ "EMPNO=7844; ENAME=TURNER; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; COMM=0.00;"
219+
+ " DEPTNO=30; JOB=CHICAGO\n"
220+
+ "EMPNO=7900; ENAME=JAMES; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; COMM=null;"
221+
+ " DEPTNO=30; JOB=CHICAGO\n";
222+
verifyResult(root, expectedResult);
223+
224+
String expectedSparkSql =
225+
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`MGR`, `EMP`.`HIREDATE`, `EMP`.`SAL`,"
226+
+ " `EMP`.`COMM`, `EMP`.`DEPTNO`, `t`.`LOC` `JOB`\n"
227+
+ "FROM `scott`.`EMP`\n"
228+
+ "LEFT JOIN (SELECT `LOC`, `DEPTNO`\n"
229+
+ "FROM `scott`.`DEPT`) `t` ON `EMP`.`DEPTNO` = `t`.`DEPTNO`";
230+
verifyPPLToSparkSQL(root, expectedSparkSql);
231+
}
232+
125233
@Ignore
126234
public void testMultipleLookupKeysReplace() {
127235
String ppl =

0 commit comments

Comments
 (0)