fix proxy get

This commit is contained in:
cooper 2024-06-18 22:49:42 +08:00
parent 3fae0091b3
commit 8b156e18ed

View File

@ -9,6 +9,7 @@ from scrapy import signals
from .myutils import ProxyPool
from .settings import USERNAME, PASSWORD
from faker import Faker
from scrapy.core.downloader.handlers.http11 import TunnelError
class ProxyMiddleware:
@ -32,7 +33,7 @@ class ProxyMiddleware:
def process_response(self, request, response, spider):
# 如果响应正常,返回响应
if response.status in [200, 301, 302, 460, 454]:
if response.status in [200, 301, 302]:
return response
# 如果响应异常,处理失败计数
else:
@ -44,14 +45,15 @@ class ProxyMiddleware:
def process_exception(self, request, exception, spider):
# 处理发生异常的请求
self._handle_proxy_failure(request.meta['proxy'], spider)
spider.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}")
spider.logger.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}")
# 重新调度请求
return request
def _handle_proxy_failure(self, http_proxy, spider):
# 增加指定代理的失败计数
proxy = http_proxy.split('@')[-1][:-1]
proxy = http_proxy.split('@')[-1][7:]
self.proxy_failures[proxy] += 1
spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}')