diff --git a/newsspider/middlewares.py b/newsspider/middlewares.py index 105df84..2962913 100644 --- a/newsspider/middlewares.py +++ b/newsspider/middlewares.py @@ -9,6 +9,7 @@ from scrapy import signals from .myutils import ProxyPool from .settings import USERNAME, PASSWORD from faker import Faker +from scrapy.core.downloader.handlers.http11 import TunnelError class ProxyMiddleware: @@ -32,7 +33,7 @@ class ProxyMiddleware: def process_response(self, request, response, spider): # 如果响应正常,返回响应 - if response.status in [200, 301, 302, 460, 454]: + if response.status in [200, 301, 302]: return response # 如果响应异常,处理失败计数 else: @@ -44,14 +45,15 @@ class ProxyMiddleware: def process_exception(self, request, exception, spider): # 处理发生异常的请求 self._handle_proxy_failure(request.meta['proxy'], spider) - spider.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}") + spider.logger.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}") # 重新调度请求 return request def _handle_proxy_failure(self, http_proxy, spider): # 增加指定代理的失败计数 - proxy = http_proxy.split('@')[-1][:-1] + proxy = http_proxy.split('@')[-1][7:] + self.proxy_failures[proxy] += 1 spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}')