|
1 | 1 | package com.zhazhapan.vspider; |
2 | 2 |
|
3 | | -import java.util.ArrayList; |
4 | | -import java.util.Date; |
5 | | -import java.util.concurrent.TimeUnit; |
6 | | -import java.util.regex.Pattern; |
7 | | - |
8 | | -import org.apache.log4j.Logger; |
9 | | - |
10 | 3 | import com.zhazhapan.util.Formatter; |
11 | 4 | import com.zhazhapan.util.ThreadPool; |
12 | 5 | import com.zhazhapan.vspider.controller.MainController; |
13 | 6 | import com.zhazhapan.vspider.modules.constant.DefaultConfigValues; |
14 | 7 | import com.zhazhapan.vspider.modules.constant.Values; |
15 | | - |
16 | 8 | import javafx.application.Application; |
17 | 9 | import javafx.fxml.FXMLLoader; |
18 | 10 | import javafx.scene.Scene; |
19 | 11 | import javafx.scene.image.Image; |
20 | 12 | import javafx.scene.layout.BorderPane; |
21 | 13 | import javafx.stage.Stage; |
22 | 14 | import javafx.stage.WindowEvent; |
| 15 | +import org.apache.log4j.Logger; |
| 16 | + |
| 17 | +import java.util.ArrayList; |
| 18 | +import java.util.Date; |
| 19 | +import java.util.concurrent.TimeUnit; |
| 20 | +import java.util.regex.Pattern; |
23 | 21 |
|
24 | 22 | /** |
25 | 23 | * @author pantao |
26 | | - * |
27 | 24 | */ |
28 | 25 | public class App extends Application { |
29 | 26 |
|
30 | | - private static Logger logger = Logger.getLogger(App.class); |
31 | | - |
32 | | - /** |
33 | | - * 界面控制器 |
34 | | - */ |
35 | | - public static MainController mainController = null; |
36 | | - |
37 | | - /** |
38 | | - * 爬虫控制器 |
39 | | - */ |
40 | | - public static VsController controller = new VsController(); |
41 | | - |
42 | | - /** |
43 | | - * 待爬取的URLs |
44 | | - */ |
45 | | - public static String[] domains; |
46 | | - |
47 | | - /** |
48 | | - * 记录访问过的URLs |
49 | | - */ |
50 | | - public static ArrayList<String> visitUrls = new ArrayList<String>(); |
51 | | - |
52 | | - /** |
53 | | - * 记录下载过的URLs |
54 | | - */ |
55 | | - public static ArrayList<String> downloadUrls = new ArrayList<String>(); |
56 | | - |
57 | | - /** |
58 | | - * 爬取延迟 |
59 | | - */ |
60 | | - public static int crawlingDelay = DefaultConfigValues.POLITENESS_DELAY; |
61 | | - |
62 | | - /** |
63 | | - * 爬虫匹配(不匹配的链接将不会爬取,匹配的链接会进入访问状态) |
64 | | - */ |
65 | | - public static Pattern crawlFilterPattern = Pattern.compile(".*"); |
66 | | - |
67 | | - /** |
68 | | - * 访问匹配(不匹配的链接将不会访问,匹配的链接会将服务器返回的源代码传送到下载模式) |
69 | | - */ |
70 | | - public static Pattern visitFilterPattern = Pattern.compile(".*"); |
71 | | - |
72 | | - /** |
73 | | - * 下载匹配(从网页源代码获取可以下载的资源,资源链接不匹配的将不会下载) |
74 | | - */ |
75 | | - public static Pattern downloadFilterPattern = Pattern.compile(".*"); |
76 | | - |
77 | | - /** |
78 | | - * 下载的存储目录 |
79 | | - */ |
80 | | - public static String DOWNLOAD_FOLDER = DefaultConfigValues.CRAWL_STORAGE_FOLDER + Values.SEPARATOR + "files" |
81 | | - + Values.SEPARATOR + Formatter.datetimeToCustomString(new Date(), "yyyyMMdd"); |
82 | | - |
83 | | - /** |
84 | | - * 主程序入口 |
85 | | - * |
86 | | - * @param args |
87 | | - * {@link String} |
88 | | - */ |
89 | | - public static void main(String[] args) { |
90 | | - logger.info("start to run app"); |
91 | | - initThreadPool(); |
92 | | - // 启动JavaFX,会调用start方法 |
93 | | - launch(args); |
94 | | - } |
95 | | - |
96 | | - @Override |
97 | | - public void start(Stage stage) throws Exception { |
98 | | - try { |
99 | | - BorderPane root = (BorderPane) FXMLLoader.load(getClass().getResource("view/MainWindow.fxml")); |
100 | | - stage.setScene(new Scene(root)); |
101 | | - } catch (Exception e) { |
102 | | - logger.error("load fxml error: " + e.getMessage()); |
103 | | - } |
104 | | - stage.setTitle(Values.MAIN_TITLE); |
105 | | - stage.getIcons().add(new Image(getClass().getResourceAsStream("view/spider.jpg"))); |
106 | | - stage.show(); |
107 | | - stage.setOnCloseRequest((WindowEvent event) -> { |
108 | | - stage.setIconified(true); |
109 | | - event.consume(); |
110 | | - }); |
111 | | - } |
112 | | - |
113 | | - /** |
114 | | - * 初始化线程池 |
115 | | - */ |
116 | | - public static void initThreadPool() { |
117 | | - ThreadPool.setCorePoolSize(1); |
118 | | - ThreadPool.setMaximumPoolSize(5); |
119 | | - ThreadPool.setKeepAliveTime(100); |
120 | | - ThreadPool.setTimeUnit(TimeUnit.MILLISECONDS); |
121 | | - ThreadPool.init(); |
122 | | - } |
| 27 | + /** |
| 28 | + * 界面控制器 |
| 29 | + */ |
| 30 | + public static MainController mainController = null; |
| 31 | + /** |
| 32 | + * 爬虫控制器 |
| 33 | + */ |
| 34 | + public static VsController controller = new VsController(); |
| 35 | + /** |
| 36 | + * 待爬取的URLs |
| 37 | + */ |
| 38 | + public static String[] domains; |
| 39 | + /** |
| 40 | + * 记录访问过的URLs |
| 41 | + */ |
| 42 | + public static ArrayList<String> visitUrls = new ArrayList<>(); |
| 43 | + /** |
| 44 | + * 记录下载过的URLs |
| 45 | + */ |
| 46 | + public static ArrayList<String> downloadUrls = new ArrayList<>(); |
| 47 | + /** |
| 48 | + * 爬取延迟 |
| 49 | + */ |
| 50 | + public static int crawlingDelay = DefaultConfigValues.POLITENESS_DELAY; |
| 51 | + /** |
| 52 | + * 爬虫匹配(不匹配的链接将不会爬取,匹配的链接会进入访问状态) |
| 53 | + */ |
| 54 | + public static Pattern crawlFilterPattern = Pattern.compile(".*"); |
| 55 | + /** |
| 56 | + * 访问匹配(不匹配的链接将不会访问,匹配的链接会将服务器返回的源代码传送到下载模式) |
| 57 | + */ |
| 58 | + public static Pattern visitFilterPattern = Pattern.compile(".*"); |
| 59 | + /** |
| 60 | + * 下载匹配(从网页源代码获取可以下载的资源,资源链接不匹配的将不会下载) |
| 61 | + */ |
| 62 | + public static Pattern downloadFilterPattern = Pattern.compile(".*"); |
| 63 | + /** |
| 64 | + * 下载的存储目录 |
| 65 | + */ |
| 66 | + public static String DOWNLOAD_FOLDER = DefaultConfigValues.CRAWL_STORAGE_FOLDER + Values.SEPARATOR + "files" + |
| 67 | + Values.SEPARATOR + Formatter.datetimeToCustomString(new Date(), "yyyyMMdd"); |
| 68 | + private static Logger logger = Logger.getLogger(App.class); |
| 69 | + |
| 70 | + /** |
| 71 | + * 主程序入口 |
| 72 | + * |
| 73 | + * @param args {@link String} |
| 74 | + */ |
| 75 | + public static void main(String[] args) { |
| 76 | + logger.info("start to run app"); |
| 77 | + initThreadPool(); |
| 78 | + // 启动JavaFX,会调用start方法 |
| 79 | + launch(args); |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * 初始化线程池 |
| 84 | + */ |
| 85 | + public static void initThreadPool() { |
| 86 | + ThreadPool.setCorePoolSize(1); |
| 87 | + ThreadPool.setMaximumPoolSize(5); |
| 88 | + ThreadPool.setKeepAliveTime(100); |
| 89 | + ThreadPool.setTimeUnit(TimeUnit.MILLISECONDS); |
| 90 | + ThreadPool.init(); |
| 91 | + } |
| 92 | + |
| 93 | + @Override |
| 94 | + public void start(Stage stage) { |
| 95 | + try { |
| 96 | + BorderPane root = FXMLLoader.load(getClass().getResource("/view/MainWindow.fxml")); |
| 97 | + stage.setScene(new Scene(root)); |
| 98 | + } catch (Exception e) { |
| 99 | + logger.error("load fxml error: " + e.getMessage()); |
| 100 | + } |
| 101 | + stage.setTitle(Values.MAIN_TITLE); |
| 102 | + stage.getIcons().add(new Image(getClass().getResourceAsStream("/image/spider.jpg"))); |
| 103 | + stage.show(); |
| 104 | + stage.setOnCloseRequest((WindowEvent event) -> { |
| 105 | + stage.setIconified(true); |
| 106 | + event.consume(); |
| 107 | + }); |
| 108 | + } |
123 | 109 | } |
0 commit comments