Here are the examples of the java api com.brucezee.jspider.Request taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
50 Examples
19
Source : WebDriverPool.java
with MIT License
from brucezee
with MIT License
from brucezee
public void shutdownOrReturn(WebDriverEx webDriver, Request request, long expireMillis) {
if (isWebDriverExpired(webDriver, expireMillis)) {
shutdownWebDriver(webDriver, request);
} else {
returnWebDriver(webDriver, request);
}
}
19
Source : WebDriverDownloader.java
with MIT License
from brucezee
with MIT License
from brucezee
private Page processFailedPage(Request request) {
return new Page(request.getUrl(), 0, null, null);
}
19
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean isDuplicate(Task task, Request request) {
if (repeatHandler != null) {
return repeatHandler.isDuplicate(task, request);
}
return false;
}
19
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
/**
* 如果获取任务返回为空,是否处理(比如添加新的任务等)
* @param task 爬虫任务
* @return 处理返回true,不处理返回false。
*/
protected boolean handleEmptyPoll(Task task) {
if (pagingRequestFactory != null) {
try {
lock.lock();
List<Request> requests = pagingRequestFactory.getRequests(task);
if (requests != null && !requests.isEmpty()) {
boolean success = false;
for (Request request : requests) {
success = push(task, request) || success;
}
return success;
}
} finally {
lock.unlock();
}
}
return false;
}
19
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean push(Task task, Request request) {
if (shouldReserved(task, request)) {
pushWhenNoRepeat(task, request);
return true;
}
if (!isDuplicate(task, request)) {
pushWhenNoRepeat(task, request);
addRepeatCheck(task, request);
return true;
}
return false;
}
19
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public Request poll(Task task) {
// 获取请求任务
Request request = doPoll(task);
if (request == null) {
// 如果任务为空,根据需要重新添加任务
if (handleEmptyPoll(task)) {
// 再次获取
request = doPoll(task);
}
}
return request;
}
19
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
/**
* 控制任务添加
* @param task 任务
* @param request 请求
* @return 是否需要强制添加
*/
protected boolean shouldReserved(Task task, Request request) {
return false;
}
19
Source : HttpClientFactory.java
with MIT License
from brucezee
with MIT License
from brucezee
public HttpUriRequest createHttpUriRequest(SiteConfig siteConfig, Request request, HttpHost proxy) {
return createRequestBuilder(siteConfig, request, proxy).build();
}
19
Source : HttpClientFactory.java
with MIT License
from brucezee
with MIT License
from brucezee
public RequestConfig createRequestConfig(SiteConfig siteConfig, Request request, HttpHost proxy) {
return createRequestConfigBuilder(siteConfig, request, proxy).build();
}
18
Source : WebDriverDownloader.java
with MIT License
from brucezee
with MIT License
from brucezee
private Page processPage(Request request, WebDriverEx webDriver) {
Header[] headers = getHeaderFromCookieSet(webDriver.manage().getCookies());
String resource = webDriver.getPageSource();
return new Page(request.getUrl(), 200, headers, resource);
}
18
Source : DefaultWebDriverChooser.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public DriverType choose(Request request) {
return driverType;
}
18
Source : RedisScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
protected void pushWhenNoRepeat(Task task, Request request) {
Jedis jedis = jedisPool.getResource();
try {
jedis.rpush(RedisKeys.getQueueKey(task), serializer.serialize(request));
} finally {
jedis.close();
}
}
18
Source : RequestJsonSerializer.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public String serialize(Request object) {
return JSON.toJSONString(object);
}
18
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void resetRequestRepeatCheck(Task task, Request request) {
if (repeatHandler != null) {
repeatHandler.resetRequestRepeatCheck(task, request);
}
}
18
Source : NoRepeatScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void addRepeatCheck(Task task, Request request) {
if (repeatHandler != null) {
repeatHandler.addRepeatCheck(task, request);
}
}
18
Source : UniversalSubPageProcessor.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean isMatch(Request request) {
return true;
}
18
Source : DefaultHttpProxyPool.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public HttpProxy getProxy(Request request) {
return proxyStrategy.getProxy(httpProxies);
}
18
Source : DefaultHttpProxyPool.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void returnProxy(Request request, int statusCode) {
}
17
Source : WebDriverPool.java
with MIT License
from brucezee
with MIT License
from brucezee
public void returnWebDriver(WebDriverEx webDriver, Request request) {
DriverType driverType = chooser.choose(request);
BlockingQueue<WebDriverEx> queue = queueMap.get(driverType);
if (queue != null) {
try {
queue.put(webDriver);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
17
Source : WebDriverPool.java
with MIT License
from brucezee
with MIT License
from brucezee
public void shutdownWebDriver(WebDriverEx webDriver, Request request) {
DriverType driverType = chooser.choose(request);
BlockingQueue<WebDriverEx> queue = queueMap.get(driverType);
if (queue != null) {
webDriver.shutdown();
if (queue instanceof LandlordBlockingQueue) {
((LandlordBlockingQueue) queue).resetOne();
}
}
}
17
Source : BloomFilterRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void resetRequestRepeatCheck(Task task, Request request) {
throw new UnsupportedOperationException();
}
17
Source : UrlMatchSubPageProcessor.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean isMatch(Request request) {
return pattern.matcher(request.getUrl()).matches();
}
17
Source : CompositePageProcessor.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public Result process(Request request, Page page) {
for (SubPageProcessor processor : processors) {
if (processor.isMatch(request)) {
return processor.process(request, page);
}
}
throw new IllegalArgumentException("No sub page processor can process request " + request);
}
17
Source : CompositePipeline.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void persist(Request request, Result result) {
for (SubPipeline pipeline : pipelines) {
if (pipeline.isMatch(request)) {
pipeline.persist(request, result);
}
}
}
17
Source : HttpClientFactory.java
with MIT License
from brucezee
with MIT License
from brucezee
public RequestConfig.Builder createRequestConfigBuilder(SiteConfig siteConfig, Request request, HttpHost proxy) {
RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
requestConfigBuilder.setConnectTimeout(siteConfig.getConnectTimeout());
requestConfigBuilder.setSocketTimeout(siteConfig.getSocketTimeout());
requestConfigBuilder.setRedirectsEnabled(siteConfig.isRedirectsEnabled());
requestConfigBuilder.setConnectionRequestTimeout(siteConfig.getConnectionRequestTimeout());
requestConfigBuilder.setCircularRedirectsAllowed(siteConfig.isCircularRedirectsAllowed());
requestConfigBuilder.setMaxRedirects(siteConfig.getMaxRedirects());
requestConfigBuilder.setCookieSpec(siteConfig.getCookieSpec());
requestConfigBuilder.setProxy(proxy);
return requestConfigBuilder;
}
17
Source : DefaultHttpClientPool.java
with MIT License
from brucezee
with MIT License
from brucezee
protected String getHttpClientCacheKey(SiteConfig siteConfig, Request request) {
return SpiderUrlUtils.getUrlHost(request.getUrl());
}
16
Source : WebDriverDownloader.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public Page download(SiteConfig siteConfig, Request request) {
WebDriverEx webDriver = null;
try {
webDriver = webDriverPool.getWebDriver(siteConfig, driverConfig, request);
} catch (Exception e) {
logger.error("Failed to get web driver from pool, url : {} {}", request.getUrl(), e);
}
if (webDriver == null) {
return processFailedPage(request);
}
try {
webDriver.get(request.getUrl());
requestWaiter.waitResponse(siteConfig, request, webDriver);
} catch (Exception e) {
logger.error("Failed to request by web driver, url : {} {}", request.getUrl(), e);
}
try {
return processPage(request, webDriver);
} catch (Exception e) {
logger.error("Failed to process page by web driver, url : {} {}", request.getUrl(), e);
return processFailedPage(request);
} finally {
webDriverPool.shutdownOrReturn(webDriver, request, driverConfig.getExpiresMillis());
}
}
16
Source : DefaultRequestWaiter.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void waitResponse(SiteConfig siteConfig, Request request, WebDriverEx webDriver) {
webDriver.waitWithreplacedleAndDelayed(null, siteConfig.getSocketTimeout(), 1000);
}
16
Source : ShardedRedisScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
protected void pushWhenNoRepeat(Task task, Request request) {
ShardedJedis jedis = jedisPool.getResource();
try {
jedis.rpush(RedisKeys.getQueueKey(task), serializer.serialize(request));
} finally {
jedis.close();
}
}
16
Source : RedisPriorityScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
protected void pushWhenNoRepeat(Task task, Request request) {
Jedis jedis = jedisPool.getResource();
try {
String content = serializer.serialize(request);
if (request.getPriority() == 0) {
jedis.rpush(RedisKeys.getQueueNoPriorityKey(task), content);
} else if (request.getPriority() > 0) {
jedis.zadd(RedisKeys.getZsetPlusPriorityKey(task), request.getPriority(), content);
} else {
jedis.zadd(RedisKeys.getZsetMinusPriorityKey(task), request.getPriority(), content);
}
jedis.sadd(RedisKeys.getSetKey(task), request.key());
} finally {
jedis.close();
}
}
16
Source : QueuePriorityScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public Request doPoll(Task task) {
Request poll = priorityQueuePlus.poll();
if (poll != null) {
return poll;
}
poll = noPriorityQueue.poll();
if (poll != null) {
return poll;
}
return priorityQueueMinus.poll();
}
16
Source : HashSetRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean isDuplicate(Task task, Request request) {
return urls.contains(request.key());
}
16
Source : BloomFilterRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void addRepeatCheck(Task task, Request request) {
bloomFilter.put(request.key());
}
16
Source : LogPipeline.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void persist(Request request, Result result) {
logger.debug("Request: {} result: {}", request.getUrl(), result);
}
16
Source : ConsolePipeline.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void persist(Request request, Result result) {
System.out.println("Request: " + request.getUrl() + " result: " + result);
}
16
Source : MonitorSpiderListener.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void onError(Request request, Page page) {
errorCount.incrementAndGet();
}
16
Source : HttpClientExecutor.java
with MIT License
from brucezee
with MIT License
from brucezee
/**
* 请求执行器
* Created by brucezee on 2017/1/6.
*/
public clreplaced HttpClientExecutor {
private HttpClientPool httpClientPool;
private HttpProxyPool httpProxyPool;
private CookieStorePool cookieStorePool;
private SiteConfig siteConfig;
private Request request;
public HttpClientExecutor(HttpClientPool httpClientPool, HttpProxyPool httpProxyPool, CookieStorePool cookieStorePool, SiteConfig siteConfig, Request request) {
this.httpClientPool = httpClientPool;
this.httpProxyPool = httpProxyPool;
this.cookieStorePool = cookieStorePool;
this.siteConfig = siteConfig;
this.request = request;
}
public <T> Response<T> execute() {
HttpProxy httpProxy = getHttpProxyFromPool();
CookieStore cookieStore = getCookieStoreFromPool();
CloseableHttpClient httpClient = httpClientPool.getHttpClient(siteConfig, request);
HttpUriRequest httpRequest = httpClientPool.createHttpUriRequest(siteConfig, request, createHttpHost(httpProxy));
CloseableHttpResponse httpResponse = null;
IOException executeException = null;
try {
HttpContext httpContext = createHttpContext(httpProxy, cookieStore);
httpResponse = httpClient.execute(httpRequest, httpContext);
} catch (IOException e) {
executeException = e;
}
Response<T> response = ResponseFactory.createResponse(request.getResponseType(), siteConfig.getCharset(request.getUrl()));
response.handleHttpResponse(httpResponse, executeException);
return response;
}
private HttpProxy getHttpProxyFromPool() {
return httpProxyPool != null ? httpProxyPool.getProxy(request) : null;
}
private CookieStore getCookieStoreFromPool() {
return cookieStorePool != null ? cookieStorePool.getCookieStore(request) : null;
}
private HttpHost createHttpHost(HttpProxy httpProxy) {
return httpProxy != null ? new HttpHost(httpProxy.getHost(), httpProxy.getPort()) : null;
}
protected HttpContext createHttpContext(HttpProxy httpProxy, CookieStore cookieStore) {
HttpContext httpContext = new HttpClientContext();
if (cookieStore != null) {
httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookieStore);
}
if (httpProxy != null && StringUtils.isNotBlank(httpProxy.getUsername())) {
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(new AuthScope(httpProxy.getHost(), httpProxy.getPort()), new UsernamePreplacedwordCredentials(httpProxy.getUsername(), httpProxy.getPreplacedword()));
httpContext.setAttribute(HttpClientContext.CREDS_PROVIDER, credentialsProvider);
}
return httpContext;
}
}
16
Source : DefaultHttpClientPool.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public HttpUriRequest createHttpUriRequest(SiteConfig siteConfig, Request request, HttpHost proxy) {
return factory.createHttpUriRequest(siteConfig, request, proxy);
}
16
Source : BdbPersistentScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean push(Task task, Request request) {
queue.add(request);
count.incrementAndGet();
return true;
}
15
Source : QueueScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void pushWhenNoRepeat(Task task, Request request) {
queue.add(request);
count.incrementAndGet();
}
15
Source : HashSetRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void addRepeatCheck(Task task, Request request) {
urls.add(request.key());
}
15
Source : HashSetRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void resetRequestRepeatCheck(Task task, Request request) {
urls.remove(request.key());
}
15
Source : BloomFilterRepeatHandler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public boolean isDuplicate(Task task, Request request) {
return bloomFilter.mightContain(request.key());
}
15
Source : MonitorSpiderListener.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void onSuccess(Request request, Page page, Result result) {
successCount.incrementAndGet();
}
14
Source : ShardedRedisPriorityScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
protected void pushWhenNoRepeat(Task task, Request request) {
ShardedJedis jedis = jedisPool.getResource();
try {
String content = serializer.serialize(request);
if (request.getPriority() == 0) {
jedis.rpush(RedisKeys.getQueueNoPriorityKey(task), content);
} else if (request.getPriority() > 0) {
jedis.zadd(RedisKeys.getZsetPlusPriorityKey(task), request.getPriority(), content);
} else {
jedis.zadd(RedisKeys.getZsetMinusPriorityKey(task), request.getPriority(), content);
}
jedis.sadd(RedisKeys.getSetKey(task), request.key());
} finally {
jedis.close();
}
}
14
Source : QueuePriorityScheduler.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public void pushWhenNoRepeat(Task task, Request request) {
int priority = request.getPriority();
if (priority == 0) {
noPriorityQueue.add(request);
} else if (priority > 0) {
priorityQueuePlus.add(request);
} else {
priorityQueueMinus.add(request);
}
count.incrementAndGet();
}
14
Source : DefaultHttpClientPool.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public CloseableHttpClient getHttpClient(SiteConfig siteConfig, Request request) {
String host = getHttpClientCacheKey(siteConfig, request);
CloseableHttpClient httpClient = httpClients.get(host);
if (httpClient == null) {
synchronized (this) {
httpClient = httpClients.get(host);
if (httpClient == null) {
httpClient = factory.createHttpClient(siteConfig);
httpClients.put(host, httpClient);
}
}
}
return httpClient;
}
13
Source : WebDriverPool.java
with MIT License
from brucezee
with MIT License
from brucezee
public WebDriverEx getWebDriver(SiteConfig siteConfig, DriverConfig driverConfig, Request request) throws IOException, InterruptedException {
DriverType driverType = chooser.choose(request);
LandlordBlockingQueue<WebDriverEx> queue = null;
queue = queueMap.get(driverType);
if (queue == null) {
lock.lockInterruptibly();
try {
queue = queueMap.get(driverType);
if (queue == null) {
queue = new LandlordBlockingQueue<WebDriverEx>(capacity);
queueMap.put(driverType, queue);
}
} finally {
lock.unlock();
}
}
WebDriverEx poll = queue.poll();
if (poll != null) {
return poll;
}
if (queue.isNeedMore()) {
queue.add(factory.createWebDriver(siteConfig, driverConfig, driverType));
}
return queue.poll(siteConfig.getConnectionRequestTimeout(), TimeUnit.MILLISECONDS);
}
13
Source : HttpClientDownloader.java
with MIT License
from brucezee
with MIT License
from brucezee
@Override
public Page download(SiteConfig siteConfig, Request request) {
HttpClientExecutor executor = new HttpClientExecutor(httpClientPool, httpProxyPool, cookieStorePool, siteConfig, request);
Response response = executor.execute();
if (response.isException()) {
logger.error("download exception, url : {} {}", request.getUrl(), response.getException().getMessage());
} else if (!response.isSuccess()) {
logger.error("download failed, url : {}", request.getUrl());
}
return new Page(request.getUrl(), response.getStatusCode(), response.getHeaders(), response.getResult());
}
5
Source : HttpClientFactory.java
with MIT License
from brucezee
with MIT License
from brucezee
public RequestBuilder createRequestBuilder(SiteConfig siteConfig, Request request, HttpHost proxy) {
RequestConfig requestConfig = createRequestConfig(siteConfig, request, proxy);
RequestBuilder requestBuilder = RequestBuilder.create(request.getMethod());
requestBuilder.setConfig(requestConfig);
requestBuilder.setCharset(getDefaultCharset(siteConfig.getCharset(request.getUrl())));
requestBuilder.setUri(request.getUrl());
requestBuilder.setEnreplacedy(request.enreplacedy());
Map<String, String> parameters = request.getParameters();
if (parameters != null && !parameters.isEmpty()) {
for (Map.Entry<String, String> entry : parameters.entrySet()) {
requestBuilder.addParameter(entry.getKey(), StringUtils.defaultString(entry.getValue()));
}
}
Map<String, String> siteHeaders = siteConfig.getHeaders();
Map<String, String> requestHeaders = request.getHeaders();
Map<String, String> mergedHeaders = null;
if (siteHeaders != null && requestHeaders != null) {
siteHeaders.putAll(requestHeaders);
mergedHeaders = siteHeaders;
} else {
mergedHeaders = siteHeaders != null ? siteHeaders : requestHeaders;
}
if (mergedHeaders != null && !mergedHeaders.isEmpty()) {
for (Map.Entry<String, String> entry : mergedHeaders.entrySet()) {
requestBuilder.addHeader(entry.getKey(), StringUtils.defaultString(entry.getValue()));
}
}
return requestBuilder;
}