Skip to content

Commit 4131977

Browse files
committed
Improve the strategy to fetch content from internet.
1 parent 62b8019 commit 4131977

1 file changed

Lines changed: 6 additions & 12 deletions

File tree

src/main/java/custom/ai/SearchAI.java

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@
1515
import java.net.MalformedURLException;
1616
import java.net.URISyntaxException;
1717
import java.net.URL;
18-
import java.net.URLEncoder;
1918
import java.util.ArrayList;
2019
import java.util.List;
2120
import java.util.regex.Matcher;
2221
import java.util.regex.Pattern;
2322

2423
public class SearchAI extends AbstractApplication implements Provider {
2524
private static final String SEARCH_URL = "https://lite.duckduckgo.com/lite/";
26-
// private static final String REGEX_PATTERN = "(?i)\\\\b((?:https?://|www\\\\d{0,3}[.]|[a-z0-9.\\\\-]+[.][a-z]{2,4}/)(?:[^\\\\s()<>]+|\\\\(([^\\\\s()<>]+|(\\\\([^\\\\s()<>]+\\\\)))*\\\\))*(?:\\\\(([^\\\\s()<>]+|(\\\\([^\\\\s()<>]+\\\\)))*\\\\)|[^\\\\s`!()\\\\[\\\\]{};:'\\\".,<>?«»“”‘’]))";
27-
// private static final String REGEX_PATTERN = "(?i)\\\\b((?:https?://|www\\\\d{0,3}[.]|[a-z0-9.\\\\-]+[.][a-z]{2,4}/)(?:[^\\\\s()<>]+|\\\\(([^\\\\s()<>]+|(\\\\([^\\\\s()<>]+\\\\)))*\\\\))*(?:\\\\(([^\\\\s()<>]+|(\\\\([^\\\\s()<>]+\\\\)))*\\\\)|[^\\\\s`!()\\\\[\\\\]{};:'\\\\\".,<>?«»“”‘’]))";
2825
private static final String REGEX_PATTERN = "(?i)\\b((?:https?:\\/\\/|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))*(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s\\W`!()\\[\\]{};:'\\\".,<>?«»“”‘’]))";
2926

3027
@Override
@@ -54,12 +51,9 @@ public Builder call() throws ApplicationException {
5451
headers.add(Header.CONNECTION.set("keep-alive"));
5552

5653
builder.setHeaders(headers).setMethod(Method.POST);
57-
try {
58-
builder.setParameter("q", URLEncoder.encode(query, "UTF-8"));
59-
} catch (UnsupportedEncodingException e) {
60-
e.printStackTrace();
61-
}
54+
builder.setParameter("q", query);
6255
builder.setParameter("kl", "us-en");
56+
builder.setParameter("df", "m");
6357
}
6458

6559
try {
@@ -73,10 +67,10 @@ public Builder call() throws ApplicationException {
7367
ArrayList<String> list = new ArrayList<String>();
7468

7569
HTMLEditorKit.ParserCallback cb = new HTMLEditorKit.ParserCallback() {
70+
final StringBuffer buffer = new StringBuffer();
7671
boolean ready = false;
7772
boolean withDuckDuckGo = false;
7873
int i = 0;
79-
final StringBuffer buffer = new StringBuffer();
8074

8175
@Override
8276
public void handleComment(char[] data, int pos) {
@@ -92,7 +86,7 @@ public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
9286
} else {
9387
if (t == HTML.Tag.HTML || t == HTML.Tag.TITLE || t == HTML.Tag.HEAD || t == HTML.Tag.META || t == HTML.Tag.BASE || t == HTML.Tag.LINK || t == HTML.Tag.SCRIPT || t == HTML.Tag.STYLE || t == HTML.Tag.MAP || t == HTML.Tag.FRAMESET) {
9488
ready = false;
95-
} else if (t == HTML.Tag.BODY || t == HTML.Tag.DIV || t == HTML.Tag.SPAN || t == HTML.Tag.P || t == HTML.Tag.A || t == HTML.Tag.B || t == HTML.Tag.I || t == HTML.Tag.STRONG || t == HTML.Tag.TD || t == HTML.Tag.LI) {
89+
} else if (t == HTML.Tag.BODY || t == HTML.Tag.H1 || t == HTML.Tag.H2 || t == HTML.Tag.H3 || t == HTML.Tag.H4 || t == HTML.Tag.H5 || t == HTML.Tag.H6 || t == HTML.Tag.DIV || t == HTML.Tag.SPAN || t == HTML.Tag.P || t == HTML.Tag.A || t == HTML.Tag.B || t == HTML.Tag.I || t == HTML.Tag.STRONG || t == HTML.Tag.TD || t == HTML.Tag.LI) {
9690
ready = true;
9791
}
9892
}
@@ -123,11 +117,11 @@ public void handleEndTag(HTML.Tag t, int pos) {
123117
public void handleText(char[] data, int pos) {
124118
if (withDuckDuckGo && ready && list.size() < 3) {
125119
buffer.append(data);
120+
buffer.append(" \n");
126121
} else if (ready) {
127122
buffer.append(data);
123+
buffer.append(" \n");
128124
}
129-
130-
buffer.append(" \n");
131125
}
132126
};
133127

0 commit comments

Comments
 (0)