Skip to content

Commit 6d8a930

Browse files
authored
Merge pull request ninenine#29 from pitaj/master
Unicode support, settings updated correctly
2 parents 380b1ae + e8945bb commit 6d8a930

5 files changed

Lines changed: 68 additions & 37 deletions

File tree

index.js

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ var Beep = {
2424

2525
parseContent: function (content) {
2626
var nil = '^(?!x)x';
27-
return parseContent(content, Beep.banned_words || nil, Beep.banned_urls || nil, Beep.censorWholeWord || nil);
27+
return parseContent(content, Beep.banned_words || nil, Beep.banned_urls || nil, Beep.censorWholeWord);
2828
},
2929
toRegExp: toRegExp,
3030
loadList: function (callback) {
@@ -34,21 +34,27 @@ var Beep = {
3434
return callback(err);
3535
}
3636

37-
Beep.illegal_words = new RegExp('\\b(?:' + Beep.toRegExp(hash.illegal) + ')\\b', 'ig');
37+
Beep.illegal_words = Beep.toRegExp(hash.illegal, true);
3838

3939
if (hash.id && hash.id.length) {
4040
var words = hash.id.split(',').filter(function (word) {
4141
return !Beep.illegal_words.test(word);
4242
});
43-
Beep.banned_words = new RegExp('\\b(?:' + Beep.toRegExp(words) + ')\\b', 'ig');
43+
Beep.banned_words = Beep.toRegExp(words, true);
4444
Beep.banned_words_raw = hash.id;
4545
} else {
46-
Beep.banned_words = new RegExp('\\b(?:' + Beep.toRegExp(defaultBanList) + ')\\b', 'ig');
46+
Beep.banned_words = Beep.toRegExp(defaultBanList, true);
4747
Beep.banned_words_raw = defaultBanList.join(',');
4848
winston.info('Default list of Banned Words is enabled. Please go to administration panel to change the list.');
4949
}
50-
Beep.banned_urls = new RegExp(Beep.toRegExp(hash.urls), 'ig');
50+
51+
Beep.banned_urls = Beep.toRegExp(hash.urls);
52+
5153
Beep.censorWholeWord = hash.censorWholeWord === 'on';
54+
if (meta.config) {
55+
meta.config.beep = meta.config.beep || {};
56+
meta.config.beep.censorWholeWord = Beep.censorWholeWord;
57+
}
5258

5359
callback();
5460
});
@@ -141,26 +147,6 @@ var Beep = {
141147
callback(null, custom_header);
142148
}
143149
},
144-
category: {
145-
get: function (data, callback) {
146-
var topics = data.category.topics;
147-
topics.forEach(function (topic) {
148-
topic.title = Beep.parseContent(topic.title);
149-
topic.slug = Beep.parseContent(topic.slug);
150-
});
151-
callback(null, data);
152-
},
153-
topics: {
154-
get: function (data, callback) {
155-
data.topics.forEach(function (topic) {
156-
topic.title = Beep.parseContent(topic.title);
157-
topic.slug = Beep.parseContent(topic.slug);
158-
topic.titleRaw = Beep.parseContent(topic.titleRaw);
159-
});
160-
callback(null, data);
161-
}
162-
}
163-
},
164150
post: {
165151
getFields: function (data, callback) {
166152
if (data.fields.indexOf('content') !== -1) {

lib/parseContent.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
'use strict';
22

3+
var isLatin = /^\w+$/;
4+
35
function parseContent(content, banned_words, banned_urls, censorWholeWord) {
46
if (!content) {
57
return content;
68
}
79

810
function censor(match) {
11+
if (!isLatin.test(match)) {
12+
return '[censored]';
13+
}
14+
915
var l = match.length;
1016
var out = match[0];
1117

@@ -24,4 +30,4 @@ function parseContent(content, banned_words, banned_urls, censorWholeWord) {
2430
.replace(banned_urls, '[link removed]');
2531
}
2632

27-
module.exports = parseContent;
33+
module.exports = parseContent;

lib/toRegExp.js

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,41 @@
11
'use strict';
22

3-
function toRegExp(arr) {
3+
var isLatin = /^\w+$/;
4+
5+
function toRegExp(arr, fullWord) {
46
if (!Array.isArray(arr)) {
57
arr = (arr || '').toString().split(',');
68
}
9+
arr = arr.filter(Boolean);
10+
11+
var str;
12+
if (fullWord) {
13+
var latin = arr.filter(function (word) {
14+
return isLatin.test(word);
15+
}).map(function (word) {
16+
return word.trim().replace(/([-[\]{}()*+?.,\\^$|#\s])/g, '\\$1');
17+
}).join('|');
718

8-
var str = arr.filter(Boolean).map(function (word) {
9-
return word.trim().replace(/([-[\]{}()*+?.,\\^$|#\s])/g, '\\$1');
10-
}).join('|');
19+
var notLatin = arr.filter(function (word) {
20+
return !isLatin.test(word);
21+
}).map(function (word) {
22+
return word.trim().replace(/([-[\]{}()*+?.,\\^$|#\s])/g, '\\$1');
23+
}).join('|');
24+
25+
if (latin && notLatin) {
26+
str = '\\b(?:' + latin + ')\\b|(?:' + notLatin + ')';
27+
} else if (latin) {
28+
str = '\\b(?:' + latin + ')\\b';
29+
} else if (notLatin) {
30+
str = notLatin;
31+
}
32+
} else {
33+
str = arr.filter(Boolean).map(function (word) {
34+
return word.trim().replace(/([-[\]{}()*+?.,\\^$|#\s])/g, '\\$1');
35+
}).join('|');
36+
}
1137

12-
return str || '^(?!x)x';
38+
return new RegExp(str || '^(?!x)x', 'ig');
1339
}
1440

15-
module.exports = toRegExp;
41+
module.exports = toRegExp;

tests/parseContent.spec.js

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ var assert = require('assert');
55
var toRegExp = require('../lib/toRegExp');
66
var parseContent = require('../lib/parseContent');
77

8-
var bannedWords = new RegExp('\\b(?:' + toRegExp(['poop', 'shit']) + ')\\b', 'ig');
9-
var bannedUrls = new RegExp(toRegExp(['http://example.com', 'http://foo.bar']), 'ig');
8+
var bannedWords = toRegExp(['poop', 'shit'], true);
9+
var bannedUrls = toRegExp(['http://example.com', 'http://foo.bar']);
1010
var nil = '^(?!x)x';
1111

1212
assert.strictEqual(parseContent(
@@ -29,3 +29,13 @@ assert.strictEqual(parseContent(
2929
bannedUrls,
3030
false
3131
), 'My favorite website is [link removed]. I also love [link removed].');
32+
33+
var unicodeBannedWords = toRegExp(['今', '野'], true);
34+
assert.strictEqual(parseContent(
35+
'載点代示早面通今就焼初哲野質',
36+
unicodeBannedWords,
37+
nil,
38+
false
39+
), '載点代示早面通[censored]就焼初哲[censored]質');
40+
41+
console.log('parseContent passed');

tests/toRegExp.spec.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ var assert = require('assert');
44

55
var toRegExp = require('../lib/toRegExp');
66

7-
assert.deepStrictEqual(toRegExp(['bad', 'words', 'here']), 'bad|words|here');
8-
assert.deepStrictEqual(toRegExp('bad,words, here'), 'bad|words|here');
9-
assert.deepStrictEqual(toRegExp(null), '^(?!x)x');
7+
assert.equal(toRegExp(['bad', 'words', 'here'], true), '/\\b(?:bad|words|here)\\b/gi');
8+
assert.equal(toRegExp('bad,words, here'), '/bad|words|here/gi');
9+
assert.equal(toRegExp('今,野', true), '/今|野/gi');
10+
assert.equal(toRegExp(null), '/^(?!x)x/gi');
11+
12+
console.log('toRegExp passed');

0 commit comments

Comments
 (0)