Python API代理示例

Python(API代理)

流程:提取 API 链接 → 解析 JSON → 处理错误码 → 使用返回的代理访问目标站。

请将 apiUrl 替换为您自己的 API 提取链接。

requests

import requests


apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy_from_api():
    try:
        resp = requests.get(apiUrl, timeout=10)
    except requests.RequestException as e:
        raise SystemExit(f"API 请求失败: {e}")

    if resp.status_code != 200:
        if resp.status_code == 400:
            raise SystemExit("API错误 400: 参数错误")
        if resp.status_code == 403:
            raise SystemExit("API错误 403: 主机IP不在白名单")
        if resp.status_code == 429:
            raise SystemExit("API错误 429: 提取频率过快")
        raise SystemExit(f"API错误 {resp.status_code}")

    data = resp.json()
    if not isinstance(data, list) or len(data) == 0:
        raise SystemExit("API 返回为空或格式异常")
    first = data[0]
    ip = first.get("ip")
    port = first.get("port")
    if not ip or not port:
        raise SystemExit("API 返回缺少 ip/port 字段")
    return ip, port

def visit_target_with_proxy(ip, port):
    proxies = {
        "http": f"http://{ip}:{port}",
        "https": f"http://{ip}:{port}",
    }
    try:
        r = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
        r.raise_for_status()
    except requests.RequestException as e:
        raise SystemExit(f"访问目标站失败: {e}")
    print(r.text)

if __name__ == "__main__":
    ip, port = fetch_proxy_from_api()
    visit_target_with_proxy(ip, port)

httpx(同步/异步)

import httpx
import asyncio

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy_sync():
    try:
        r = httpx.get(apiUrl, timeout=10)
    except httpx.HTTPError as e:
        raise SystemExit(f"API 请求失败: {e}")
    if r.status_code != 200:
        mapping = {400:"参数错误",403:"主机IP不在白名单",429:"提取频率过快"}
        raise SystemExit(f"API错误 {r.status_code}: {mapping.get(r.status_code, '未知错误')}")
    arr = r.json()
    if not arr:
        raise SystemExit("API 返回为空")
    return arr[0]["ip"], arr[0]["port"]

async def fetch_proxy_async():
    async with httpx.AsyncClient(timeout=10) as client:
        r = await client.get(apiUrl)
        if r.status_code != 200:
            raise SystemExit(f"API错误 {r.status_code}")
        arr = r.json()
        return arr[0]["ip"], arr[0]["port"]

def visit_with_httpx(ip, port):
    proxies = {
        "http://": f"http://{ip}:{port}",
        "https://": f"http://{ip}:{port}",
    }
    r = httpx.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
    print(r.text)

if __name__ == "__main__":
    ip, port = fetch_proxy_sync()
    visit_with_httpx(ip, port)
    # 或使用异步
    # ip, port = asyncio.run(fetch_proxy_async())
    # visit_with_httpx(ip, port)

aiohttp

import aiohttp
import asyncio

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

async def main():
    async with aiohttp.ClientSession() as session:
        async with session.get(apiUrl, timeout=10) as resp:
            if resp.status != 200:
                raise SystemExit(f"API错误 {resp.status}")
            arr = await resp.json()
            if not arr:
                raise SystemExit("API 返回为空")
            ip, port = arr[0]["ip"], arr[0]["port"]

    proxy = f"http://{ip}:{port}"
    async with aiohttp.ClientSession() as session:
        async with session.get("https://httpbin.org/ip", proxy=proxy, timeout=10) as r:
            print(await r.text())

if __name__ == "__main__":
    asyncio.run(main())

urllib.request(urllib2)

import json
from urllib import request, error

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy_from_api():
    try:
        with request.urlopen(apiUrl, timeout=10) as resp:
            if resp.status != 200:
                raise SystemExit(f"API错误 {resp.status}")
            data = json.loads(resp.read().decode("utf-8"))
    except error.URLError as e:
        raise SystemExit(f"API 请求失败: {e}")

    if not isinstance(data, list) or not data:
        raise SystemExit("API 返回为空或格式异常")
    first = data[0]
    ip = first.get("ip")
    port = first.get("port")
    if not ip or not port:
        raise SystemExit("API 返回缺少 ip/port 字段")
    return ip, port

def visit_with_urllib(ip, port):
    proxy_handler = request.ProxyHandler({
        "http": f"http://{ip}:{port}",
        "https": f"http://{ip}:{port}",
    })
    opener = request.build_opener(proxy_handler)
    opener.addheaders = [("User-Agent", "Mozilla/5.0")] 
    with opener.open("https://httpbin.org/ip", timeout=10) as r:
        print(r.read().decode("utf-8"))

if __name__ == "__main__":
    ip, port = fetch_proxy_from_api()
    visit_with_urllib(ip, port)

Python 2(如仍在使用)对应 urllib2

# -*- coding: utf-8 -*-
import json
import urllib2

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy_from_api():
    r = urllib2.urlopen(apiUrl, timeout=10)
    data = json.loads(r.read())
    ip, port = data[0]["ip"], data[0]["port"]
    return ip, port

def visit_with_urllib2(ip, port):
    proxy_handler = urllib2.ProxyHandler({
        "http": "http://%s:%s" % (ip, port),
        "https": "http://%s:%s" % (ip, port),
    })
    opener = urllib2.build_opener(proxy_handler)
    opener.addheaders = [("User-Agent", "Mozilla/5.0")]
    r = opener.open("https://httpbin.org/ip", timeout=10)
    print r.read()

if __name__ == "__main__":
    ip, port = fetch_proxy_from_api()
    visit_with_urllib2(ip, port)

Scrapy(Downloader Middleware)

在 Scrapy 中,推荐通过 Downloader Middleware 为请求动态设置代理。

middlewares.py


import time
import json
import logging
import requests

logger = logging.getLogger(__name__)

class ApiProxyMiddleware(object):
    def __init__(self, api_url, cache_ttl_seconds=30):
        self.api_url = api_url
        self.cache_ttl_seconds = cache_ttl_seconds
        self._cached_proxy = None
        self._cached_at = 0.0

    @classmethod
    def from_crawler(cls, crawler):
        api_url = crawler.settings.get(
            "API_PROXY_URL",
            "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json",
        )
        ttl = crawler.settings.getint("API_PROXY_CACHE_TTL", 30)
        return cls(api_url, ttl)

    def _need_refresh(self):
        return (time.time() - self._cached_at) >= self.cache_ttl_seconds or not self._cached_proxy

    def _fetch_proxy(self):
        r = requests.get(self.api_url, timeout=10)
        if r.status_code != 200:
            raise RuntimeError("API错误 {}".format(r.status_code))
        arr = r.json()
        if not arr:
            raise RuntimeError("API 返回为空")
        first = arr[0]
        ip, port = first.get("ip"), first.get("port")
        if not ip or not port:
            raise RuntimeError("API 返回缺少 ip/port")
        return "http://{}:{}".format(ip, port)

    def process_request(self, request, spider):
        try:
            if self._need_refresh():
                self._cached_proxy = self._fetch_proxy()
                self._cached_at = time.time()
            request.meta["proxy"] = self._cached_proxy
        except Exception as e:
            logger.warning("设置代理失败: %s", e)
            # 放行请求(不加代理)或根据需要抛出异常
            # raise

settings.py


HTTPPROXY_ENABLED = True


API_PROXY_URL = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"
API_PROXY_CACHE_TTL = 30

DOWNLOADER_MIDDLEWARES = {
    # 确保在默认代理中间件之前或之后,依据你的项目来放置
    'your_project.middlewares.ApiProxyMiddleware': 543,
}

example_spider.py

import scrapy

class HttpBinSpider(scrapy.Spider):
    name = "httpbin"
    start_urls = ["https://httpbin.org/ip"]

    def parse(self, response):
        self.logger.info("响应: %s", response.text)

urllib3

import json
import urllib3

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy_from_api():
    http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=5.0, read=10.0))
    r = http.request("GET", apiUrl)
    if r.status != 200:
        raise SystemExit("API错误 {}".format(r.status))
    arr = json.loads(r.data.decode("utf-8"))
    ip, port = arr[0]["ip"], arr[0]["port"]
    return ip, port

def visit_with_urllib3(ip, port):
    proxy_url = f"http://{ip}:{port}"
    http = urllib3.ProxyManager(proxy_url, timeout=urllib3.Timeout(connect=5.0, read=10.0))
    r = http.request("GET", "https://httpbin.org/ip", headers={"User-Agent": "Mozilla/5.0"})
    print(r.data.decode("utf-8"))

if __name__ == "__main__":
    ip, port = fetch_proxy_from_api()
    visit_with_urllib3(ip, port)

Selenium(Chrome 示例)

import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy():
    r = requests.get(apiUrl, timeout=10)
    r.raise_for_status()
    arr = r.json()
    return arr[0]["ip"], arr[0]["port"]

if __name__ == "__main__":
    ip, port = fetch_proxy()
    options = Options()
    options.add_argument(f"--proxy-server=http://{ip}:{port}")
    driver = webdriver.Chrome(options=options)
    try:
        driver.get("https://httpbin.org/ip")
        print(driver.page_source)
    finally:
        driver.quit()

Playwright(同步 API)

import requests
from playwright.sync_api import sync_playwright

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy():
    r = requests.get(apiUrl, timeout=10)
    r.raise_for_status()
    arr = r.json()
    return arr[0]["ip"], arr[0]["port"]

if __name__ == "__main__":
    ip, port = fetch_proxy()
    with sync_playwright() as p:
        browser = p.chromium.launch()
        context = browser.new_context(proxy={"server": f"http://{ip}:{port}"})
        page = context.new_page()
        page.goto("https://httpbin.org/ip")
        print(page.content())
        browser.close()

curl_cffi(高性能请求库)

from curl_cffi import requests as c_requests

apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json"

def fetch_proxy():
    arr = c_requests.get(apiUrl, timeout=10).json()
    return arr[0]["ip"], arr[0]["port"]

if __name__ == "__main__":
    ip, port = fetch_proxy()
    proxies = {
        "http": f"http://{ip}:{port}",
        "https": f"http://{ip}:{port}",
    }
    r = c_requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
    print(r.text)