Python(API代理)
流程:提取 API 链接 → 解析 JSON → 处理错误码 → 使用返回的代理访问目标站。
请将 apiUrl 替换为您自己的 API 提取链接。
requests
import requests
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy_from_api():
try:
resp = requests.get(apiUrl, timeout=10)
except requests.RequestException as e:
raise SystemExit(f"API 请求失败: {e}")
if resp.status_code != 200:
if resp.status_code == 400:
raise SystemExit("API错误 400: 参数错误")
if resp.status_code == 403:
raise SystemExit("API错误 403: 主机IP不在白名单")
if resp.status_code == 429:
raise SystemExit("API错误 429: 提取频率过快")
raise SystemExit(f"API错误 {resp.status_code}")
data = resp.json()
if not isinstance(data, list) or len(data) == 0:
raise SystemExit("API 返回为空或格式异常")
first = data[0]
ip = first.get("ip")
port = first.get("port")
if not ip or not port:
raise SystemExit("API 返回缺少 ip/port 字段")
return ip, port
def visit_target_with_proxy(ip, port):
proxies = {
"http": f"http://{ip}:{port}",
"https": f"http://{ip}:{port}",
}
try:
r = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
r.raise_for_status()
except requests.RequestException as e:
raise SystemExit(f"访问目标站失败: {e}")
print(r.text)
if __name__ == "__main__":
ip, port = fetch_proxy_from_api()
visit_target_with_proxy(ip, port)
httpx(同步/异步)
import httpx
import asyncio
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy_sync():
try:
r = httpx.get(apiUrl, timeout=10)
except httpx.HTTPError as e:
raise SystemExit(f"API 请求失败: {e}")
if r.status_code != 200:
mapping = {400:"参数错误",403:"主机IP不在白名单",429:"提取频率过快"}
raise SystemExit(f"API错误 {r.status_code}: {mapping.get(r.status_code, '未知错误')}")
arr = r.json()
if not arr:
raise SystemExit("API 返回为空")
return arr[0]["ip"], arr[0]["port"]
async def fetch_proxy_async():
async with httpx.AsyncClient(timeout=10) as client:
r = await client.get(apiUrl)
if r.status_code != 200:
raise SystemExit(f"API错误 {r.status_code}")
arr = r.json()
return arr[0]["ip"], arr[0]["port"]
def visit_with_httpx(ip, port):
proxies = {
"http://": f"http://{ip}:{port}",
"https://": f"http://{ip}:{port}",
}
r = httpx.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
print(r.text)
if __name__ == "__main__":
ip, port = fetch_proxy_sync()
visit_with_httpx(ip, port)
# 或使用异步
# ip, port = asyncio.run(fetch_proxy_async())
# visit_with_httpx(ip, port)
aiohttp
import aiohttp
import asyncio
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
async def main():
async with aiohttp.ClientSession() as session:
async with session.get(apiUrl, timeout=10) as resp:
if resp.status != 200:
raise SystemExit(f"API错误 {resp.status}")
arr = await resp.json()
if not arr:
raise SystemExit("API 返回为空")
ip, port = arr[0]["ip"], arr[0]["port"]
proxy = f"http://{ip}:{port}"
async with aiohttp.ClientSession() as session:
async with session.get("https://httpbin.org/ip", proxy=proxy, timeout=10) as r:
print(await r.text())
if __name__ == "__main__":
asyncio.run(main())
urllib.request(urllib2)
import json
from urllib import request, error
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy_from_api():
try:
with request.urlopen(apiUrl, timeout=10) as resp:
if resp.status != 200:
raise SystemExit(f"API错误 {resp.status}")
data = json.loads(resp.read().decode("utf-8"))
except error.URLError as e:
raise SystemExit(f"API 请求失败: {e}")
if not isinstance(data, list) or not data:
raise SystemExit("API 返回为空或格式异常")
first = data[0]
ip = first.get("ip")
port = first.get("port")
if not ip or not port:
raise SystemExit("API 返回缺少 ip/port 字段")
return ip, port
def visit_with_urllib(ip, port):
proxy_handler = request.ProxyHandler({
"http": f"http://{ip}:{port}",
"https": f"http://{ip}:{port}",
})
opener = request.build_opener(proxy_handler)
opener.addheaders = [("User-Agent", "Mozilla/5.0")]
with opener.open("https://httpbin.org/ip", timeout=10) as r:
print(r.read().decode("utf-8"))
if __name__ == "__main__":
ip, port = fetch_proxy_from_api()
visit_with_urllib(ip, port)
Python 2(如仍在使用)对应
urllib2:# -*- coding: utf-8 -*- import json import urllib2 apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json" def fetch_proxy_from_api(): r = urllib2.urlopen(apiUrl, timeout=10) data = json.loads(r.read()) ip, port = data[0]["ip"], data[0]["port"] return ip, port def visit_with_urllib2(ip, port): proxy_handler = urllib2.ProxyHandler({ "http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port), }) opener = urllib2.build_opener(proxy_handler) opener.addheaders = [("User-Agent", "Mozilla/5.0")] r = opener.open("https://httpbin.org/ip", timeout=10) print r.read() if __name__ == "__main__": ip, port = fetch_proxy_from_api() visit_with_urllib2(ip, port)
Scrapy(Downloader Middleware)
在 Scrapy 中,推荐通过 Downloader Middleware 为请求动态设置代理。
middlewares.py:
import time
import json
import logging
import requests
logger = logging.getLogger(__name__)
class ApiProxyMiddleware(object):
def __init__(self, api_url, cache_ttl_seconds=30):
self.api_url = api_url
self.cache_ttl_seconds = cache_ttl_seconds
self._cached_proxy = None
self._cached_at = 0.0
@classmethod
def from_crawler(cls, crawler):
api_url = crawler.settings.get(
"API_PROXY_URL",
"http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json",
)
ttl = crawler.settings.getint("API_PROXY_CACHE_TTL", 30)
return cls(api_url, ttl)
def _need_refresh(self):
return (time.time() - self._cached_at) >= self.cache_ttl_seconds or not self._cached_proxy
def _fetch_proxy(self):
r = requests.get(self.api_url, timeout=10)
if r.status_code != 200:
raise RuntimeError("API错误 {}".format(r.status_code))
arr = r.json()
if not arr:
raise RuntimeError("API 返回为空")
first = arr[0]
ip, port = first.get("ip"), first.get("port")
if not ip or not port:
raise RuntimeError("API 返回缺少 ip/port")
return "http://{}:{}".format(ip, port)
def process_request(self, request, spider):
try:
if self._need_refresh():
self._cached_proxy = self._fetch_proxy()
self._cached_at = time.time()
request.meta["proxy"] = self._cached_proxy
except Exception as e:
logger.warning("设置代理失败: %s", e)
# 放行请求(不加代理)或根据需要抛出异常
# raise
settings.py:
HTTPPROXY_ENABLED = True
API_PROXY_URL = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
API_PROXY_CACHE_TTL = 30
DOWNLOADER_MIDDLEWARES = {
# 确保在默认代理中间件之前或之后,依据你的项目来放置
'your_project.middlewares.ApiProxyMiddleware': 543,
}
example_spider.py:
import scrapy
class HttpBinSpider(scrapy.Spider):
name = "httpbin"
start_urls = ["https://httpbin.org/ip"]
def parse(self, response):
self.logger.info("响应: %s", response.text)
urllib3
import json
import urllib3
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy_from_api():
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=5.0, read=10.0))
r = http.request("GET", apiUrl)
if r.status != 200:
raise SystemExit("API错误 {}".format(r.status))
arr = json.loads(r.data.decode("utf-8"))
ip, port = arr[0]["ip"], arr[0]["port"]
return ip, port
def visit_with_urllib3(ip, port):
proxy_url = f"http://{ip}:{port}"
http = urllib3.ProxyManager(proxy_url, timeout=urllib3.Timeout(connect=5.0, read=10.0))
r = http.request("GET", "https://httpbin.org/ip", headers={"User-Agent": "Mozilla/5.0"})
print(r.data.decode("utf-8"))
if __name__ == "__main__":
ip, port = fetch_proxy_from_api()
visit_with_urllib3(ip, port)
Selenium(Chrome 示例)
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy():
r = requests.get(apiUrl, timeout=10)
r.raise_for_status()
arr = r.json()
return arr[0]["ip"], arr[0]["port"]
if __name__ == "__main__":
ip, port = fetch_proxy()
options = Options()
options.add_argument(f"--proxy-server=http://{ip}:{port}")
driver = webdriver.Chrome(options=options)
try:
driver.get("https://httpbin.org/ip")
print(driver.page_source)
finally:
driver.quit()
Playwright(同步 API)
import requests
from playwright.sync_api import sync_playwright
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy():
r = requests.get(apiUrl, timeout=10)
r.raise_for_status()
arr = r.json()
return arr[0]["ip"], arr[0]["port"]
if __name__ == "__main__":
ip, port = fetch_proxy()
with sync_playwright() as p:
browser = p.chromium.launch()
context = browser.new_context(proxy={"server": f"http://{ip}:{port}"})
page = context.new_page()
page.goto("https://httpbin.org/ip")
print(page.content())
browser.close()
curl_cffi(高性能请求库)
from curl_cffi import requests as c_requests
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
def fetch_proxy():
arr = c_requests.get(apiUrl, timeout=10).json()
return arr[0]["ip"], arr[0]["port"]
if __name__ == "__main__":
ip, port = fetch_proxy()
proxies = {
"http": f"http://{ip}:{port}",
"https": f"http://{ip}:{port}",
}
r = c_requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
print(r.text)