Java API代理示例

流程:调用 API → 解析 JSON → 处理错误 → 使用返回代理访问目标站。

[!NOTE]提示

下述示例均采用 IP 白名单模式(无需用户名密码认证)。请使用合适的 JSON 解析库,生产环境可选 Jackson/Gson 等。

HttpClient 4.x

import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONObject;

public class ApiProxyDemo {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    static String httpGet(String url) throws Exception {
        try (CloseableHttpClient client = HttpClients.createDefault()) {
            HttpGet get = new HttpGet(url);
            try (CloseableHttpResponse resp = client.execute(get)) {
                int code = resp.getStatusLine().getStatusCode();
                if (code != 200) {
                    throw new RuntimeException("API错误: " + code);
                }
                return EntityUtils.toString(resp.getEntity(), "UTF-8");
            }
        }
    }

    static void visitWithProxy(String ip, int port) throws Exception {
        HttpHost proxy = new HttpHost(ip, port, "http");
        RequestConfig cfg = RequestConfig.custom().setProxy(proxy).setConnectTimeout(10000).setSocketTimeout(10000).build();

        try (CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(cfg).build()) {
            HttpGet get = new HttpGet("https://httpbin.org/ip");
            try (CloseableHttpResponse resp = client.execute(get)) {
                int code = resp.getStatusLine().getStatusCode();
                if (code != 200) throw new RuntimeException("访问失败: " + code);
                System.out.println(EntityUtils.toString(resp.getEntity(), "UTF-8"));
            }
        }
    }

    public static void main(String[] args) throws Exception {
        String body = httpGet(apiUrl);
        JSONArray arr = new JSONArray(body);
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        JSONObject first = arr.getJSONObject(0);
        String ip = first.getString("ip");
        int port = first.getInt("port");
        visitWithProxy(ip, port);
    }
}

OkHttp

import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;

public class ApiProxyOkHttpDemo {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    public static void main(String[] args) throws Exception {
        OkHttpClient client = new OkHttpClient.Builder().build();
        Request req = new Request.Builder().url(apiUrl).build();
        Response r = client.newCall(req).execute();
        if (!r.isSuccessful()) throw new RuntimeException("API错误: " + r.code());
        JSONArray arr = new JSONArray(r.body().string());
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        JSONObject first = arr.getJSONObject(0);
        String ip = first.getString("ip");
        int port = first.getInt("port");

        java.net.Proxy proxy = new java.net.Proxy(java.net.Proxy.Type.HTTP, new java.net.InetSocketAddress(ip, port));
        OkHttpClient proxied = new OkHttpClient.Builder().proxy(proxy).build();
        Request rr = new Request.Builder().url("https://httpbin.org/ip").build();
        Response r2 = proxied.newCall(rr).execute();
        if (!r2.isSuccessful()) throw new RuntimeException("访问失败:" + r2.code());
        System.out.println(r2.body().string());
    }
}

Jsoup

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.json.JSONArray;
import org.json.JSONObject;

public class ApiProxyJsoupDemo {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    static JSONObject fetchFirstProxy() throws Exception {
        Connection.Response res = Jsoup.connect(apiUrl)
                .ignoreContentType(true)
                .timeout(10_000)
                .execute();
        int code = res.statusCode();
        if (code != 200) {
            switch (code) {
                case 400: throw new RuntimeException("API错误 400: 参数错误");
                case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
                case 429: throw new RuntimeException("API错误 429: 提取频率过快");
                default: throw new RuntimeException("API错误: " + code);
            }
        }
        JSONArray arr = new JSONArray(res.body());
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        return arr.getJSONObject(0);
    }

    static void visitWithJsoup(String ip, int port) throws Exception {
        Document doc = Jsoup.connect("https://httpbin.org/ip")
                .proxy(ip, port)
                .userAgent("Mozilla/5.0")
                .timeout(10_000)
                .get();
        System.out.println(doc.body().text());
    }

    public static void main(String[] args) throws Exception {
        JSONObject first = fetchFirstProxy();
        String ip = first.getString("ip");
        int port = first.getInt("port");
        visitWithJsoup(ip, port);
    }
}

Selenium(Chrome 示例)

import org.json.JSONArray;
import org.json.JSONObject;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;

public class ApiProxySeleniumDemo {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    static JSONObject fetchFirstProxy() throws Exception {
        HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
        HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
        HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
        int code = resp.statusCode();
        if (code != 200) {
            switch (code) {
                case 400: throw new RuntimeException("API错误 400: 参数错误");
                case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
                case 429: throw new RuntimeException("API错误 429: 提取频率过快");
                default: throw new RuntimeException("API错误: " + code);
            }
        }
        JSONArray arr = new JSONArray(resp.body());
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        return arr.getJSONObject(0);
    }

    public static void main(String[] args) throws Exception {
        JSONObject first = fetchFirstProxy();
        String ip = first.getString("ip");
        int port = first.getInt("port");

        ChromeOptions options = new ChromeOptions();
        options.addArguments("--proxy-server=http://" + ip + ":" + port);
        WebDriver driver = new ChromeDriver(options);
        try {
            driver.get("https://httpbin.org/ip");
            System.out.println(driver.getPageSource());
        } finally {
            driver.quit();
        }
    }
}

WebMagic

import org.json.JSONArray;
import org.json.JSONObject;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;

public class ApiProxyWebMagicDemo implements PageProcessor {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    private final Site site = Site.me().setRetryTimes(1).setTimeOut(10_000);

    static JSONObject fetchFirstProxy() throws Exception {
        HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
        HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
        HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
        int code = resp.statusCode();
        if (code != 200) {
            switch (code) {
                case 400: throw new RuntimeException("API错误 400: 参数错误");
                case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
                case 429: throw new RuntimeException("API错误 429: 提取频率过快");
                default: throw new RuntimeException("API错误: " + code);
            }
        }
        JSONArray arr = new JSONArray(resp.body());
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        return arr.getJSONObject(0);
    }

    @Override
    public void process(Page page) {
        System.out.println(page.getRawText());
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
        JSONObject first = fetchFirstProxy();
        String ip = first.getString("ip");
        int port = first.getInt("port");

        HttpClientDownloader downloader = new HttpClientDownloader();
        downloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(ip, port)));

        Spider.create(new ApiProxyWebMagicDemo())
                .setDownloader(downloader)
                .addUrl("https://httpbin.org/ip")
                .run();
    }
}

crawler4j

import org.json.JSONArray;
import org.json.JSONObject;

import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.WebURL;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;

public class ApiProxyCrawler4jDemo {
    static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";

    static JSONObject fetchFirstProxy() throws Exception {
        HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
        HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
        HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
        int code = resp.statusCode();
        if (code != 200) {
            switch (code) {
                case 400: throw new RuntimeException("API错误 400: 参数错误");
                case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
                case 429: throw new RuntimeException("API错误 429: 提取频率过快");
                default: throw new RuntimeException("API错误: " + code);
            }
        }
        JSONArray arr = new JSONArray(resp.body());
        if (arr.length() == 0) throw new RuntimeException("API 返回为空");
        return arr.getJSONObject(0);
    }

    public static class SinglePageCrawler extends WebCrawler {
        @Override
        public boolean shouldVisit(Page referringPage, WebURL url) {
            return true; // 仅示例
        }

        @Override
        public void visit(Page page) {
            String content = new String(page.getContentData(), StandardCharsets.UTF_8);
            System.out.println(content);
        }
    }

    public static void main(String[] args) throws Exception {
        JSONObject first = fetchFirstProxy();
        String ip = first.getString("ip");
        int port = first.getInt("port");

        CrawlConfig config = new CrawlConfig();
        config.setCrawlStorageFolder("./crawl_data");
        config.setPolitenessDelay(200);
        config.setMaxDepthOfCrawling(0);
        config.setProxyHost(ip);
        config.setProxyPort(port);

        PageFetcher pageFetcher = new PageFetcher(config);
        RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
        RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
        CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

        controller.addSeed("https://httpbin.org/ip");
        controller.start(SinglePageCrawler.class, 1);
    }
}