Skip to content

Commit ff2e8dd

Browse files
committed
[searchcursor addon] Support multi-line regexp matching
1 parent 3c8b5a7 commit ff2e8dd

4 files changed

Lines changed: 137 additions & 40 deletions

File tree

addon/search/search.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555
function getSearchCursor(cm, query, pos) {
5656
// Heuristic: if the query string is all lowercase, do a case insensitive search.
57-
return cm.getSearchCursor(query, pos, queryCaseInsensitive(query));
57+
return cm.getSearchCursor(query, pos, {caseFold: queryCaseInsensitive(query), multiline: true});
5858
}
5959

6060
function persistentDialog(cm, text, deflt, onEnter, onKeyDown) {

addon/search/searchcursor.js

Lines changed: 94 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,19 @@
1212
"use strict"
1313
var Pos = CodeMirror.Pos
1414

15+
function regexpFlags(regexp) {
16+
var flags = regexp.flags
17+
return flags != null ? flags : (regexp.ignoreCase ? "i" : "")
18+
+ (regexp.global ? "g" : "")
19+
+ (regexp.multiline ? "m" : "")
20+
}
21+
1522
function ensureGlobal(regexp) {
16-
return regexp.global ? regexp : new RegExp(regexp.source, regexp.ignoreCase ? "ig" : "g")
23+
return regexp.global ? regexp : new RegExp(regexp.source, regexpFlags(regexp) + "g")
24+
}
25+
26+
function maybeMultiline(regexp) {
27+
return /\\s|\\n|\n|\\W|\\D|\[\^/.test(regexp.source)
1728
}
1829

1930
function searchRegexpForward(doc, regexp, start) {
@@ -28,27 +39,82 @@
2839
}
2940
}
3041

42+
function searchRegexpForwardMultiline(doc, regexp, start) {
43+
if (!maybeMultiline(regexp)) return searchRegexpForward(doc, regexp, start)
44+
45+
regexp = ensureGlobal(regexp)
46+
var string, chunk = 1
47+
for (var line = start.line, last = doc.lastLine(); line <= last;) {
48+
// This grows the search buffer in exponentially-sized chunks
49+
// between matches, so that nearby matches are fast and don't
50+
// require concatenating the whole document (in case we're
51+
// searching for something that has tons of matches), but at the
52+
// same time, the amount of retries is limited.
53+
for (var i = 0; i < chunk; i++) {
54+
var curLine = doc.getLine(line++)
55+
string = string == null ? curLine : string + "\n" + curLine
56+
}
57+
chunk = chunk * 2
58+
regexp.lastIndex = start.ch
59+
var match = regexp.exec(string)
60+
if (match && match[0].length) {
61+
var before = string.slice(0, match.index).split("\n"), inside = match[0].split("\n")
62+
var startLine = start.line + before.length - 1, startCh = before[before.length - 1].length
63+
return {from: Pos(startLine, startCh),
64+
to: Pos(startLine + inside.length - 1,
65+
inside.length == 1 ? startCh + inside[0].length : inside[inside.length - 1].length),
66+
match: match}
67+
}
68+
}
69+
}
70+
71+
function lastMatchIn(string, regexp) {
72+
var cutOff = 0, match
73+
for (;;) {
74+
regexp.lastIndex = cutOff
75+
var newMatch = regexp.exec(string)
76+
if (!newMatch) return match
77+
match = newMatch
78+
cutOff = match.index + (match[0].length || 1)
79+
if (cutOff == string.length) return match
80+
}
81+
}
82+
3183
function searchRegexpBackward(doc, regexp, start) {
3284
regexp = ensureGlobal(regexp)
3385
for (var line = start.line, ch = start.ch, first = doc.firstLine(); line >= first; line--, ch = -1) {
34-
var string = doc.getLine(line), cutOff = 0, match
86+
var string = doc.getLine(line)
3587
if (ch > -1) string = string.slice(0, ch)
36-
for (;;) {
37-
regexp.lastIndex = cutOff
38-
var newMatch = regexp.exec(string)
39-
if (!newMatch) break
40-
match = newMatch
41-
cutOff = match.index + (match[0].length || 1)
42-
if (cutOff == line.length) break
43-
}
44-
88+
var match = lastMatchIn(string, regexp)
4589
if (match && match[0].length)
4690
return {from: Pos(line, match.index),
4791
to: Pos(line, match.index + match[0].length),
4892
match: match}
4993
}
5094
}
5195

96+
function searchRegexpBackwardMultiline(doc, regexp, start) {
97+
regexp = ensureGlobal(regexp)
98+
var string, chunk = 1
99+
for (var line = start.line, first = doc.firstLine(); line >= first;) {
100+
for (var i = 0; i < chunk; i++) {
101+
var curLine = doc.getLine(line--)
102+
string = string == null ? curLine.slice(0, start.ch) : curLine + "\n" + string
103+
}
104+
chunk *= 2
105+
106+
var match = lastMatchIn(string, regexp)
107+
if (match && match[0].length) {
108+
var before = string.slice(0, match.index).split("\n"), inside = match[0].split("\n")
109+
var startLine = line + before.length, startCh = before[before.length - 1].length
110+
return {from: Pos(startLine, startCh),
111+
to: Pos(startLine + inside.length - 1,
112+
inside.length == 1 ? startCh + inside[0].length : inside[inside.length - 1].length),
113+
match: match}
114+
}
115+
}
116+
}
117+
52118
function doFold(str) { return str.toLowerCase() }
53119
function noFold(str) { return str }
54120

@@ -119,22 +185,35 @@
119185
}
120186
}
121187

122-
function SearchCursor(doc, query, pos, caseFold) {
188+
function SearchCursor(doc, query, pos, options) {
123189
this.atOccurrence = false
124190
this.doc = doc
125191
pos = pos ? doc.clipPos(pos) : Pos(0, 0)
126192
this.pos = {from: pos, to: pos}
127193

194+
var caseFold
195+
if (typeof options == "object") {
196+
caseFold = options.caseFold
197+
} else { // Backwards compat for when caseFold was the 4th argument
198+
caseFold = options
199+
options = null
200+
}
201+
128202
if (typeof query == "string") {
129203
if (caseFold == null) caseFold = false
130204
this.matches = function(reverse, pos) {
131205
return (reverse ? searchStringBackward : searchStringForward)(doc, query, pos, caseFold)
132206
}
133207
} else {
134208
query = ensureGlobal(query)
135-
this.matches = function(reverse, pos) {
136-
return (reverse ? searchRegexpBackward : searchRegexpForward)(doc, query, pos)
137-
}
209+
if (!options || options.multiline !== false)
210+
this.matches = function(reverse, pos) {
211+
return (reverse ? searchRegexpBackwardMultiline : searchRegexpForwardMultiline)(doc, query, pos)
212+
}
213+
else
214+
this.matches = function(reverse, pos) {
215+
return (reverse ? searchRegexpBackward : searchRegexpForward)(doc, query, pos)
216+
}
138217
}
139218
}
140219

doc/manual.html

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,16 +2263,18 @@ <h2 id="addons">Addons</h2>
22632263
<p>Depends on <code>addon/dialog/dialog.css</code>.</p></dd>
22642264

22652265
<dt id="addon_searchcursor"><a href="../addon/search/searchcursor.js"><code>search/searchcursor.js</code></a></dt>
2266-
<dd>Adds the <code>getSearchCursor(query, start, caseFold) →
2266+
<dd>Adds the <code>getSearchCursor(query, start, options) →
22672267
cursor</code> method to CodeMirror instances, which can be used
22682268
to implement search/replace functionality. <code>query</code>
2269-
can be a regular expression or a string (only strings will match
2270-
across lines—if they contain newlines). <code>start</code>
2269+
can be a regular expression or a string. <code>start</code>
22712270
provides the starting position of the search. It can be
22722271
a <code>{line, ch}</code> object, or can be left off to default
2273-
to the start of the document. <code>caseFold</code> is only
2274-
relevant when matching a string. It will cause the search to be
2275-
case-insensitive. A search cursor has the following methods:
2272+
to the start of the document. <code>options</code> is an
2273+
optional object, which can contain the property `caseFold:
2274+
false` to disable case folding when mathing a string, or the
2275+
property `multiline: disable` to disable multi-line matching for
2276+
regular expressions (which may help performance). A search
2277+
cursor has the following methods:
22762278
<dl>
22772279
<dt><code><strong>findNext</strong>() → boolean</code></dt>
22782280
<dt><code><strong>findPrevious</strong>() → boolean</code></dt>

test/search_test.js

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,8 @@
11
(function() {
22
"use strict";
33

4-
function test(name) {
5-
var text = Array.prototype.slice.call(arguments, 1, arguments.length - 1).join("\n");
6-
var body = arguments[arguments.length - 1];
7-
return window.test("search_" + name, function() {
8-
body(new CodeMirror.Doc(text));
9-
});
10-
}
11-
12-
function run(doc, query, insensitive) {
13-
var cursor = doc.getSearchCursor(query, null, insensitive);
4+
function run(doc, query, options) {
5+
var cursor = doc.getSearchCursor(query, null, options);
146
for (var i = 3; i < arguments.length; i += 4) {
157
var found = cursor.findNext();
168
is(found, "not enough results (forward)");
@@ -27,35 +19,59 @@
2719
is(!cursor.findPrevious(), "too many matches (backwards)");
2820
}
2921

30-
test("simple", "abcdefg", "abcdefg", function(doc) {
22+
function test(name, f) { window.test("search_" + name, f) }
23+
24+
test("simple", function() {
25+
var doc = new CodeMirror.Doc("abcdefg\nabcdefg")
3126
run(doc, "cde", false, 0, 2, 0, 5, 1, 2, 1, 5);
3227
});
3328

34-
test("multiline", "hallo", "a", "b", "goodbye", function(doc) {
29+
test("multiline", function() {
30+
var doc = new CodeMirror.Doc("hallo\na\nb\ngoodbye")
3531
run(doc, "llo\na\nb\ngoo", false, 0, 2, 3, 3);
3632
run(doc, "blah\na\nb\nhall", false);
3733
run(doc, "bye\nx\neye", false);
3834
});
3935

40-
test("regexp", "abcde", "abcde", function(doc) {
36+
test("regexp", function() {
37+
var doc = new CodeMirror.Doc("abcde\nabcde")
4138
run(doc, /bcd/, false, 0, 1, 0, 4, 1, 1, 1, 4);
4239
run(doc, /BCD/, false);
4340
run(doc, /BCD/i, false, 0, 1, 0, 4, 1, 1, 1, 4);
4441
});
4542

46-
test("insensitive", "hallo", "HALLO", "oink", "hAllO", function(doc) {
43+
test("regexpMultiline", function() {
44+
var doc = new CodeMirror.Doc("foo foo\nbar\nbaz")
45+
run(doc, /fo[^]*az/, {multiline: true}, 0, 0, 2, 3)
46+
run(doc, /[oa][^u]/, {multiline: true}, 0, 1, 0, 3, 0, 5, 0, 7, 1, 1, 1, 3, 2, 1, 2, 3)
47+
run(doc, /[a][^u]{2}/, {multiline: true}, 1, 1, 2, 0)
48+
})
49+
50+
test("insensitive", function() {
51+
var doc = new CodeMirror.Doc("hallo\nHALLO\noink\nhAllO")
4752
run(doc, "All", false, 3, 1, 3, 4);
4853
run(doc, "All", true, 0, 1, 0, 4, 1, 1, 1, 4, 3, 1, 3, 4);
4954
});
5055

51-
test("multilineInsensitive", "zie ginds komT", "De Stoomboot", "uit Spanje weer aan", function(doc) {
56+
test("multilineInsensitive", function() {
57+
var doc = new CodeMirror.Doc("zie ginds komT\nDe Stoomboot\nuit Spanje weer aan")
5258
run(doc, "komt\nde stoomboot\nuit", false);
53-
run(doc, "komt\nde stoomboot\nuit", true, 0, 10, 2, 3);
54-
run(doc, "kOMt\ndE stOOmboot\nuiT", true, 0, 10, 2, 3);
59+
run(doc, "komt\nde stoomboot\nuit", {caseFold: true}, 0, 10, 2, 3);
60+
run(doc, "kOMt\ndE stOOmboot\nuiT", {caseFold: true}, 0, 10, 2, 3);
5561
});
5662

57-
test("expandingCaseFold", "<b>İİ İİ</b>", "<b>uu uu</b>", function(doc) {
63+
test("multilineInsensitiveSlow", function() {
64+
var text = ""
65+
for (var i = 0; i < 1000; i++) text += "foo\nbar\n"
66+
var doc = new CodeMirror.Doc("find\nme\n" + text + "find\nme\n")
67+
var t0 = +new Date
68+
run(doc, /find\nme/, {multiline: true}, 0, 0, 1, 2, 2002, 0, 2003, 2)
69+
is(+new Date - t0 < 100)
70+
})
71+
72+
test("expandingCaseFold", function() {
5873
if (phantom) return; // A Phantom bug makes this hang
74+
var doc = new CodeMirror.Doc("<b>İİ İİ</b>\n<b>uu uu</b>")
5975
run(doc, "</b>", true, 0, 8, 0, 12, 1, 8, 1, 12);
6076
run(doc, "İİ", true, 0, 3, 0, 5, 0, 6, 0, 8);
6177
});

0 commit comments

Comments
 (0)