Python API代理示例

阅读模式

Python(API代理)

流程:提取 API 链接 → 解析 JSON → 处理错误码 → 使用返回的代理访问目标站。

请将 apiUrl 替换为您自己的 API 提取链接。

requests

1
import requests
2
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
def fetch_proxy_from_api():
7
try:
8
resp = requests.get(apiUrl, timeout=10)
9
except requests.RequestException as e:
10
raise SystemExit(f"API 请求失败: {e}")
11
12
if resp.status_code != 200:
13
if resp.status_code == 400:
14
raise SystemExit("API错误 400: 参数错误")
15
if resp.status_code == 403:
16
raise SystemExit("API错误 403: 主机IP不在白名单")
17
if resp.status_code == 429:
18
raise SystemExit("API错误 429: 提取频率过快")
19
raise SystemExit(f"API错误 {resp.status_code}")
20
21
data = resp.json()
22
if not isinstance(data, list) or len(data) == 0:
23
raise SystemExit("API 返回为空或格式异常")
24
first = data[0]
25
ip = first.get("ip")
26
port = first.get("port")
27
if not ip or not port:
28
raise SystemExit("API 返回缺少 ip/port 字段")
29
return ip, port
30
31
def visit_target_with_proxy(ip, port):
32
proxies = {
33
"http": f"http://{ip}:{port}",
34
"https": f"http://{ip}:{port}",
35
}
36
try:
37
r = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
38
r.raise_for_status()
39
except requests.RequestException as e:
40
raise SystemExit(f"访问目标站失败: {e}")
41
print(r.text)
42
43
if __name__ == "__main__":
44
ip, port = fetch_proxy_from_api()
45
visit_target_with_proxy(ip, port)

httpx(同步/异步)

1
import httpx
2
import asyncio
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
def fetch_proxy_sync():
7
try:
8
r = httpx.get(apiUrl, timeout=10)
9
except httpx.HTTPError as e:
10
raise SystemExit(f"API 请求失败: {e}")
11
if r.status_code != 200:
12
mapping = {400:"参数错误",403:"主机IP不在白名单",429:"提取频率过快"}
13
raise SystemExit(f"API错误 {r.status_code}: {mapping.get(r.status_code, '未知错误')}")
14
arr = r.json()
15
if not arr:
16
raise SystemExit("API 返回为空")
17
return arr[0]["ip"], arr[0]["port"]
18
19
async def fetch_proxy_async():
20
async with httpx.AsyncClient(timeout=10) as client:
21
r = await client.get(apiUrl)
22
if r.status_code != 200:
23
raise SystemExit(f"API错误 {r.status_code}")
24
arr = r.json()
25
return arr[0]["ip"], arr[0]["port"]
26
27
def visit_with_httpx(ip, port):
28
proxies = {
29
"http://": f"http://{ip}:{port}",
30
"https://": f"http://{ip}:{port}",
31
}
32
r = httpx.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
33
print(r.text)
34
35
if __name__ == "__main__":
36
ip, port = fetch_proxy_sync()
37
visit_with_httpx(ip, port)
38
# 或使用异步
39
# ip, port = asyncio.run(fetch_proxy_async())
40
# visit_with_httpx(ip, port)

aiohttp

1
import aiohttp
2
import asyncio
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
async def main():
7
async with aiohttp.ClientSession() as session:
8
async with session.get(apiUrl, timeout=10) as resp:
9
if resp.status != 200:
10
raise SystemExit(f"API错误 {resp.status}")
11
arr = await resp.json()
12
if not arr:
13
raise SystemExit("API 返回为空")
14
ip, port = arr[0]["ip"], arr[0]["port"]
15
16
proxy = f"http://{ip}:{port}"
17
async with aiohttp.ClientSession() as session:
18
async with session.get("https://httpbin.org/ip", proxy=proxy, timeout=10) as r:
19
print(await r.text())
20
21
if __name__ == "__main__":
22
asyncio.run(main())

urllib.request(urllib2)

1
import json
2
from urllib import request, error
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
def fetch_proxy_from_api():
7
try:
8
with request.urlopen(apiUrl, timeout=10) as resp:
9
if resp.status != 200:
10
raise SystemExit(f"API错误 {resp.status}")
11
data = json.loads(resp.read().decode("utf-8"))
12
except error.URLError as e:
13
raise SystemExit(f"API 请求失败: {e}")
14
15
if not isinstance(data, list) or not data:
16
raise SystemExit("API 返回为空或格式异常")
17
first = data[0]
18
ip = first.get("ip")
19
port = first.get("port")
20
if not ip or not port:
21
raise SystemExit("API 返回缺少 ip/port 字段")
22
return ip, port
23
24
def visit_with_urllib(ip, port):
25
proxy_handler = request.ProxyHandler({
26
"http": f"http://{ip}:{port}",
27
"https": f"http://{ip}:{port}",
28
})
29
opener = request.build_opener(proxy_handler)
30
opener.addheaders = [("User-Agent", "Mozilla/5.0")]
31
with opener.open("https://httpbin.org/ip", timeout=10) as r:
32
print(r.read().decode("utf-8"))
33
34
if __name__ == "__main__":
35
ip, port = fetch_proxy_from_api()
36
visit_with_urllib(ip, port)

Python 2(如仍在使用)对应 urllib2

1
# -*- coding: utf-8 -*-
2
import json
3
import urllib2
4
5
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
6
7
def fetch_proxy_from_api():
8
r = urllib2.urlopen(apiUrl, timeout=10)
9
data = json.loads(r.read())
10
ip, port = data[0]["ip"], data[0]["port"]
11
return ip, port
12
13
def visit_with_urllib2(ip, port):
14
proxy_handler = urllib2.ProxyHandler({
15
"http": "http://%s:%s" % (ip, port),
16
"https": "http://%s:%s" % (ip, port),
17
})
18
opener = urllib2.build_opener(proxy_handler)
19
opener.addheaders = [("User-Agent", "Mozilla/5.0")]
20
r = opener.open("https://httpbin.org/ip", timeout=10)
21
print r.read()
22
23
if __name__ == "__main__":
24
ip, port = fetch_proxy_from_api()
25
visit_with_urllib2(ip, port)

Scrapy(Downloader Middleware)

在 Scrapy 中,推荐通过 Downloader Middleware 为请求动态设置代理。

middlewares.py

1
2
import time
3
import json
4
import logging
5
import requests
6
7
logger = logging.getLogger(__name__)
8
9
class ApiProxyMiddleware(object):
10
def __init__(self, api_url, cache_ttl_seconds=30):
11
self.api_url = api_url
12
self.cache_ttl_seconds = cache_ttl_seconds
13
self._cached_proxy = None
14
self._cached_at = 0.0
15
16
@classmethod
17
def from_crawler(cls, crawler):
18
api_url = crawler.settings.get(
19
"API_PROXY_URL",
20
"http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json",
21
)
22
ttl = crawler.settings.getint("API_PROXY_CACHE_TTL", 30)
23
return cls(api_url, ttl)
24
25
def _need_refresh(self):
26
return (time.time() - self._cached_at) >= self.cache_ttl_seconds or not self._cached_proxy
27
28
def _fetch_proxy(self):
29
r = requests.get(self.api_url, timeout=10)
30
if r.status_code != 200:
31
raise RuntimeError("API错误 {}".format(r.status_code))
32
arr = r.json()
33
if not arr:
34
raise RuntimeError("API 返回为空")
35
first = arr[0]
36
ip, port = first.get("ip"), first.get("port")
37
if not ip or not port:
38
raise RuntimeError("API 返回缺少 ip/port")
39
return "http://{}:{}".format(ip, port)
40
41
def process_request(self, request, spider):
42
try:
43
if self._need_refresh():
44
self._cached_proxy = self._fetch_proxy()
45
self._cached_at = time.time()
46
request.meta["proxy"] = self._cached_proxy
47
except Exception as e:
48
logger.warning("设置代理失败: %s", e)
49
# 放行请求(不加代理)或根据需要抛出异常
50
# raise

settings.py

1
2
HTTPPROXY_ENABLED = True
3
4
5
API_PROXY_URL = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
6
API_PROXY_CACHE_TTL = 30
7
8
DOWNLOADER_MIDDLEWARES = {
9
# 确保在默认代理中间件之前或之后,依据你的项目来放置
10
'your_project.middlewares.ApiProxyMiddleware': 543,
11
}

example_spider.py

1
import scrapy
2
3
class HttpBinSpider(scrapy.Spider):
4
name = "httpbin"
5
start_urls = ["https://httpbin.org/ip"]
6
7
def parse(self, response):
8
self.logger.info("响应: %s", response.text)

urllib3

1
import json
2
import urllib3
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
def fetch_proxy_from_api():
7
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=5.0, read=10.0))
8
r = http.request("GET", apiUrl)
9
if r.status != 200:
10
raise SystemExit("API错误 {}".format(r.status))
11
arr = json.loads(r.data.decode("utf-8"))
12
ip, port = arr[0]["ip"], arr[0]["port"]
13
return ip, port
14
15
def visit_with_urllib3(ip, port):
16
proxy_url = f"http://{ip}:{port}"
17
http = urllib3.ProxyManager(proxy_url, timeout=urllib3.Timeout(connect=5.0, read=10.0))
18
r = http.request("GET", "https://httpbin.org/ip", headers={"User-Agent": "Mozilla/5.0"})
19
print(r.data.decode("utf-8"))
20
21
if __name__ == "__main__":
22
ip, port = fetch_proxy_from_api()
23
visit_with_urllib3(ip, port)

Selenium(Chrome 示例)

1
import requests
2
from selenium import webdriver
3
from selenium.webdriver.chrome.options import Options
4
5
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
6
7
def fetch_proxy():
8
r = requests.get(apiUrl, timeout=10)
9
r.raise_for_status()
10
arr = r.json()
11
return arr[0]["ip"], arr[0]["port"]
12
13
if __name__ == "__main__":
14
ip, port = fetch_proxy()
15
options = Options()
16
options.add_argument(f"--proxy-server=http://{ip}:{port}")
17
driver = webdriver.Chrome(options=options)
18
try:
19
driver.get("https://httpbin.org/ip")
20
print(driver.page_source)
21
finally:
22
driver.quit()

Playwright(同步 API)

1
import requests
2
from playwright.sync_api import sync_playwright
3
4
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
5
6
def fetch_proxy():
7
r = requests.get(apiUrl, timeout=10)
8
r.raise_for_status()
9
arr = r.json()
10
return arr[0]["ip"], arr[0]["port"]
11
12
if __name__ == "__main__":
13
ip, port = fetch_proxy()
14
with sync_playwright() as p:
15
browser = p.chromium.launch()
16
context = browser.new_context(proxy={"server": f"http://{ip}:{port}"})
17
page = context.new_page()
18
page.goto("https://httpbin.org/ip")
19
print(page.content())
20
browser.close()

curl_cffi(高性能请求库)

1
from curl_cffi import requests as c_requests
2
3
apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
4
5
def fetch_proxy():
6
arr = c_requests.get(apiUrl, timeout=10).json()
7
return arr[0]["ip"], arr[0]["port"]
8
9
if __name__ == "__main__":
10
ip, port = fetch_proxy()
11
proxies = {
12
"http": f"http://{ip}:{port}",
13
"https": f"http://{ip}:{port}",
14
}
15
r = c_requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
16
print(r.text)