fix proxy get

This commit is contained in:
cooper 2024-06-18 22:49:42 +08:00
parent 3fae0091b3
commit 8b156e18ed

View File

@ -9,6 +9,7 @@ from scrapy import signals
from .myutils import ProxyPool from .myutils import ProxyPool
from .settings import USERNAME, PASSWORD from .settings import USERNAME, PASSWORD
from faker import Faker from faker import Faker
from scrapy.core.downloader.handlers.http11 import TunnelError
class ProxyMiddleware: class ProxyMiddleware:
@ -32,7 +33,7 @@ class ProxyMiddleware:
def process_response(self, request, response, spider): def process_response(self, request, response, spider):
# 如果响应正常,返回响应 # 如果响应正常,返回响应
if response.status in [200, 301, 302, 460, 454]: if response.status in [200, 301, 302]:
return response return response
# 如果响应异常,处理失败计数 # 如果响应异常,处理失败计数
else: else:
@ -44,14 +45,15 @@ class ProxyMiddleware:
def process_exception(self, request, exception, spider): def process_exception(self, request, exception, spider):
# 处理发生异常的请求 # 处理发生异常的请求
self._handle_proxy_failure(request.meta['proxy'], spider) self._handle_proxy_failure(request.meta['proxy'], spider)
spider.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}") spider.logger.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}")
# 重新调度请求 # 重新调度请求
return request return request
def _handle_proxy_failure(self, http_proxy, spider): def _handle_proxy_failure(self, http_proxy, spider):
# 增加指定代理的失败计数 # 增加指定代理的失败计数
proxy = http_proxy.split('@')[-1][:-1] proxy = http_proxy.split('@')[-1][7:]
self.proxy_failures[proxy] += 1 self.proxy_failures[proxy] += 1
spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}') spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}')