-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathPuppeteerCookieRetriever.cs
More file actions
301 lines (268 loc) · 12.9 KB
/
PuppeteerCookieRetriever.cs
File metadata and controls
301 lines (268 loc) · 12.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Threading.Tasks;
using NLog;
using UniversalDownloaderPlatform.Common.Interfaces;
using PuppeteerSharp;
using PuppeteerSharp.Input;
using UniversalDownloaderPlatform.PuppeteerEngine.Interfaces;
using UniversalDownloaderPlatform.PuppeteerEngine.Interfaces.Wrappers.Browser;
using UniversalDownloaderPlatform.Common.Interfaces.Models;
namespace UniversalDownloaderPlatform.PuppeteerEngine
{
/// <summary>
/// Somewhat universal cookie retriever based on chromium browser
/// </summary>
public class PuppeteerCookieRetriever : ICookieRetriever, IDisposable
{
private readonly Logger _logger = LogManager.GetCurrentClassLogger();
private IPuppeteerEngine _puppeteerEngine;
private IPuppeteerSettings _settings;
private bool _isHeadlessBrowser;
private bool _isRemoteBrowser;
private bool _shouldTryAutoLogin;
/// <summary>
/// Create new instance of PuppeteerCookieRetriever
/// </summary>
/// <param name="loginPage">Address which will be used to open login page</param>
/// <param name="loginCheckPage">Address which will be used to check if user is logged in</param>
/// <param name="remoteBrowserAddress">Address of the remote chromium instance. If set to null then internal copy will be used.</param>
/// <param name="headlessBrowser">If set to false then the internal browser will be visible, ignored if remote browser is used</param>
/// <param name="proxyServerAddress">Address of the proxy server to use (null for no proxy server)</param>
public PuppeteerCookieRetriever()
{
}
public Task BeforeStart(IUniversalDownloaderPlatformSettings settings)
{
_settings = settings as IPuppeteerSettings;
_shouldTryAutoLogin = !string.IsNullOrWhiteSpace(_settings.LoginEmail) && !string.IsNullOrWhiteSpace(_settings.LoginPassword);
if (_settings.RemoteBrowserAddress != null)
{
_puppeteerEngine = new PuppeteerEngine(_settings.RemoteBrowserAddress) { UserAgent = settings.UserAgent };
_isHeadlessBrowser = true;
_isRemoteBrowser = true;
}
else
{
_isHeadlessBrowser = false;
_isRemoteBrowser = false;
_puppeteerEngine = new PuppeteerEngine(_isHeadlessBrowser, _settings.ProxyServerAddress) { UserAgent = settings.UserAgent };
}
return Task.CompletedTask;
}
private async Task<IWebBrowser> RestartBrowser(bool headless)
{
await _puppeteerEngine.CloseBrowser();
await Task.Delay(1000); //safety first
_puppeteerEngine = new PuppeteerEngine(headless, _settings.ProxyServerAddress);
return await _puppeteerEngine.GetBrowser();
}
protected virtual async Task Login()
{
_logger.Debug("Retrieving browser");
IWebBrowser browser = await _puppeteerEngine.GetBrowser();
IWebPage page = null;
bool loggedIn = false;
do
{
if (page == null || page.IsClosed)
page = await browser.NewPageAsync();
_logger.Debug("Checking login status");
IWebResponse response = await page.GoToAsync(_settings.LoginCheckAddress);
if (!await IsLoggedIn(response))
{
_logger.Debug("We are NOT logged in, opening login page");
if (_isRemoteBrowser && !_shouldTryAutoLogin)
{
await page.CloseAsync();
throw new Exception("You are not logged in into your account in remote browser. Please login and restart application.");
}
if (_puppeteerEngine.IsHeadless && !_shouldTryAutoLogin)
{
_logger.Debug("Puppeteer is in headless mode, restarting in full mode");
browser = await RestartBrowser(false);
page = await browser.NewPageAsync();
}
//no await is done on purpose because otherwise WaitForRequestAsync misses tons of events sometimes including _settings.LoginCheckAddress load
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
page.GoToAsync(_settings.LoginPageAddress, null);
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
if (_shouldTryAutoLogin)
{
_logger.Debug("Credentials were supplied, attempting automatic Patreon login");
await TryAutoLogin(page);
}
else
{
_logger.Debug("Waiting for user to log in Patreon manually");
await page.WaitForRequestAsync(request => { return request.Url.Contains(_settings.LoginCheckAddress); });
}
}
else
{
_logger.Debug("We are logged in");
if (_puppeteerEngine.IsHeadless != _isHeadlessBrowser)
{
browser = await RestartBrowser(_isHeadlessBrowser);
page = await browser.NewPageAsync();
}
loggedIn = true;
}
} while (!loggedIn);
await page.CloseAsync();
}
private async Task TryAutoLogin(IWebPage page)
{
try
{
await page.WaitForNetworkIdleAsync(new WaitForNetworkIdleOptions()
{
// The hanging requests are:
// https://accounts.google.com/gsi/client
// https://www.facebook.com/x/oauth/status
// https://www.google.com/recaptcha/enterprise/webworker.js
Concurrency = 3,
IdleTime = 1000,
Timeout = 10000,
});
}
catch (Exception ex) when (ex is WaitTaskTimeoutException || ex is TimeoutException) // In case there are other hanging requests (they seem to appear randomly)
{
_logger.Debug("Waiting for network idle timeout; proceed anyway and hope for the best");
}
IWebResponse response = await EnterAndSubmit(page, "input[aria-label=\"Email\"]", _settings.LoginEmail);
if ((await response.TextAsync()).Contains("\"next_auth_step\":\"signup\""))
{
throw new Exception("There does not exist an account with the provided email");
}
await EnterAndSubmit(page, "input[aria-label=\"Password\"]", _settings.LoginPassword);
// Not sure why this is needed, but otherwise GoToAsync will throw PuppeteerSharp.NavigationException: net::ERR_ABORTED
await page.CloseAsync();
}
private async Task<IWebResponse> EnterAndSubmit(IWebPage page, string selector, string text)
{
const string submitSelector = "button[type=\"submit\"][aria-disabled=\"false\"]";
await page.WaitForSelectorAsync(selector, new WaitForSelectorOptions() { Timeout = 10000 });
_logger.Debug($"Found {selector}, entering information");
await Task.Delay(300);
int retry;
for (retry = 0; retry < 5; retry++)
{
await page.TypeAsync(selector, text, new TypeOptions() { Delay = 50 });
try
{
await page.WaitForSelectorAsync(submitSelector, new WaitForSelectorOptions() { Timeout = 3000 });
}
catch (Exception ex) when (ex is WaitTaskTimeoutException || ex is TimeoutException)
{
_logger.Debug($"Submit button did not appear; retrying {retry}/5");
await page.ClickAsync(selector, new ClickOptions()
{
Count = 3, // hopefully select all text in the field
Delay = 50,
OffSet = new Offset(10, 10)
});
continue;
}
break;
}
if (retry == 5)
{
throw new Exception("Cannot find the submit button after 5 tries");
}
await Task.Delay(300);
await page.ClickAsync(submitSelector);
IWebResponse authResponse = await page.WaitForResponseAsync(
response => { return response.Url.Contains(_settings.AuthAddress); },
new WaitForOptions() { Timeout = 10000 }
);
switch (authResponse.Status)
{
case HttpStatusCode.OK:
return authResponse;
case HttpStatusCode.BadRequest:
throw new Exception($"Auth returned non-OK code: {authResponse.Status}; you probably provided an invalid email");
case HttpStatusCode.TooManyRequests:
throw new Exception($"Auth returned non-OK code: {authResponse.Status}; you probably tried logging in for too many times");
case HttpStatusCode.Forbidden:
throw new Exception($"Auth returned non-OK code: {authResponse.Status}; you either provided a wrong password or are blocked");
default:
throw new Exception($"Auth returned non-OK code: {authResponse.Status}");
}
}
/// <summary>
/// Perform check if the received response contains data which can be used to assume that we are logged in
/// </summary>
/// <param name="response"></param>
/// <returns>True if logged in, false if not logged in</returns>
protected virtual Task<bool> IsLoggedIn(IWebResponse response)
{
return Task.FromResult(response.Status != HttpStatusCode.Unauthorized && response.Status != HttpStatusCode.Forbidden);
}
public virtual async Task<CookieContainer> RetrieveCookies()
{
try
{
CookieContainer cookieContainer = new CookieContainer(1000, 100, CookieContainer.DefaultCookieLengthLimit);
_logger.Debug("Calling login check");
try
{
await Login();
}
catch (Exception ex)
{
_logger.Fatal($"Login error: {ex.Message}", ex);
return null;
}
_logger.Debug("Retrieving browser");
IWebBrowser browser = await _puppeteerEngine.GetBrowser();
_logger.Debug("Retrieving cookies");
IWebPage page = await browser.NewPageAsync();
await page.GoToAsync(_settings.LoginCheckAddress);
CookieParam[] browserCookies = await page.GetCookiesAsync();
if (browserCookies != null && browserCookies.Length > 0)
{
foreach (CookieParam browserCookie in browserCookies)
{
try
{
_logger.Debug($"Adding cookie: {browserCookie.Name}");
// Sanitize cookie value to remove characters that are not valid in .NET cookie values
string sanitizedValue = WebUtility.UrlEncode(browserCookie.Value);
Cookie cookie = new Cookie(browserCookie.Name, sanitizedValue, browserCookie.Path, browserCookie.Domain);
cookieContainer.Add(cookie);
}
catch (Exception ex)
{
_logger.Warn($"Failed to add cookie {browserCookie.Name}: {ex.Message}.", ex);
}
}
}
else
{
_logger.Fatal("No cookies were extracted from browser");
return null;
}
await page.CloseAsync();
return cookieContainer;
}
catch (TimeoutException ex)
{
_logger.Fatal($"Internal operation timed out. Exception: {ex}");
return null;
}
}
public async Task<string> GetUserAgent()
{
IWebBrowser browser = await _puppeteerEngine.GetBrowser();
return await browser.GetUserAgentAsync();
}
public void Dispose()
{
_puppeteerEngine?.Dispose();
}
}
}