流程:调用 API → 解析 JSON → 处理错误 → 使用返回代理访问目标站。
[!NOTE]提示
下述示例均采用 IP 白名单模式(无需用户名密码认证)。请使用合适的 JSON 解析库,生产环境可选 Jackson/Gson 等。
HttpClient 4.x
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONObject;
public class ApiProxyDemo {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
static String httpGet(String url) throws Exception {
try (CloseableHttpClient client = HttpClients.createDefault()) {
HttpGet get = new HttpGet(url);
try (CloseableHttpResponse resp = client.execute(get)) {
int code = resp.getStatusLine().getStatusCode();
if (code != 200) {
throw new RuntimeException("API错误: " + code);
}
return EntityUtils.toString(resp.getEntity(), "UTF-8");
}
}
}
static void visitWithProxy(String ip, int port) throws Exception {
HttpHost proxy = new HttpHost(ip, port, "http");
RequestConfig cfg = RequestConfig.custom().setProxy(proxy).setConnectTimeout(10000).setSocketTimeout(10000).build();
try (CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(cfg).build()) {
HttpGet get = new HttpGet("https://httpbin.org/ip");
try (CloseableHttpResponse resp = client.execute(get)) {
int code = resp.getStatusLine().getStatusCode();
if (code != 200) throw new RuntimeException("访问失败: " + code);
System.out.println(EntityUtils.toString(resp.getEntity(), "UTF-8"));
}
}
}
public static void main(String[] args) throws Exception {
String body = httpGet(apiUrl);
JSONArray arr = new JSONArray(body);
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
JSONObject first = arr.getJSONObject(0);
String ip = first.getString("ip");
int port = first.getInt("port");
visitWithProxy(ip, port);
}
}
OkHttp
import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
public class ApiProxyOkHttpDemo {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
public static void main(String[] args) throws Exception {
OkHttpClient client = new OkHttpClient.Builder().build();
Request req = new Request.Builder().url(apiUrl).build();
Response r = client.newCall(req).execute();
if (!r.isSuccessful()) throw new RuntimeException("API错误: " + r.code());
JSONArray arr = new JSONArray(r.body().string());
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
JSONObject first = arr.getJSONObject(0);
String ip = first.getString("ip");
int port = first.getInt("port");
java.net.Proxy proxy = new java.net.Proxy(java.net.Proxy.Type.HTTP, new java.net.InetSocketAddress(ip, port));
OkHttpClient proxied = new OkHttpClient.Builder().proxy(proxy).build();
Request rr = new Request.Builder().url("https://httpbin.org/ip").build();
Response r2 = proxied.newCall(rr).execute();
if (!r2.isSuccessful()) throw new RuntimeException("访问失败:" + r2.code());
System.out.println(r2.body().string());
}
}
Jsoup
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.json.JSONArray;
import org.json.JSONObject;
public class ApiProxyJsoupDemo {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
static JSONObject fetchFirstProxy() throws Exception {
Connection.Response res = Jsoup.connect(apiUrl)
.ignoreContentType(true)
.timeout(10_000)
.execute();
int code = res.statusCode();
if (code != 200) {
switch (code) {
case 400: throw new RuntimeException("API错误 400: 参数错误");
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
default: throw new RuntimeException("API错误: " + code);
}
}
JSONArray arr = new JSONArray(res.body());
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
return arr.getJSONObject(0);
}
static void visitWithJsoup(String ip, int port) throws Exception {
Document doc = Jsoup.connect("https://httpbin.org/ip")
.proxy(ip, port)
.userAgent("Mozilla/5.0")
.timeout(10_000)
.get();
System.out.println(doc.body().text());
}
public static void main(String[] args) throws Exception {
JSONObject first = fetchFirstProxy();
String ip = first.getString("ip");
int port = first.getInt("port");
visitWithJsoup(ip, port);
}
}
Selenium(Chrome 示例)
import org.json.JSONArray;
import org.json.JSONObject;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
public class ApiProxySeleniumDemo {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
static JSONObject fetchFirstProxy() throws Exception {
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
int code = resp.statusCode();
if (code != 200) {
switch (code) {
case 400: throw new RuntimeException("API错误 400: 参数错误");
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
default: throw new RuntimeException("API错误: " + code);
}
}
JSONArray arr = new JSONArray(resp.body());
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
return arr.getJSONObject(0);
}
public static void main(String[] args) throws Exception {
JSONObject first = fetchFirstProxy();
String ip = first.getString("ip");
int port = first.getInt("port");
ChromeOptions options = new ChromeOptions();
options.addArguments("--proxy-server=http://" + ip + ":" + port);
WebDriver driver = new ChromeDriver(options);
try {
driver.get("https://httpbin.org/ip");
System.out.println(driver.getPageSource());
} finally {
driver.quit();
}
}
}
WebMagic
import org.json.JSONArray;
import org.json.JSONObject;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
public class ApiProxyWebMagicDemo implements PageProcessor {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
private final Site site = Site.me().setRetryTimes(1).setTimeOut(10_000);
static JSONObject fetchFirstProxy() throws Exception {
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
int code = resp.statusCode();
if (code != 200) {
switch (code) {
case 400: throw new RuntimeException("API错误 400: 参数错误");
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
default: throw new RuntimeException("API错误: " + code);
}
}
JSONArray arr = new JSONArray(resp.body());
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
return arr.getJSONObject(0);
}
@Override
public void process(Page page) {
System.out.println(page.getRawText());
}
@Override
public Site getSite() {
return site;
}
public static void main(String[] args) throws Exception {
JSONObject first = fetchFirstProxy();
String ip = first.getString("ip");
int port = first.getInt("port");
HttpClientDownloader downloader = new HttpClientDownloader();
downloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(ip, port)));
Spider.create(new ApiProxyWebMagicDemo())
.setDownloader(downloader)
.addUrl("https://httpbin.org/ip")
.run();
}
}
crawler4j
import org.json.JSONArray;
import org.json.JSONObject;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.WebURL;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
public class ApiProxyCrawler4jDemo {
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
static JSONObject fetchFirstProxy() throws Exception {
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
int code = resp.statusCode();
if (code != 200) {
switch (code) {
case 400: throw new RuntimeException("API错误 400: 参数错误");
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
default: throw new RuntimeException("API错误: " + code);
}
}
JSONArray arr = new JSONArray(resp.body());
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
return arr.getJSONObject(0);
}
public static class SinglePageCrawler extends WebCrawler {
@Override
public boolean shouldVisit(Page referringPage, WebURL url) {
return true; // 仅示例
}
@Override
public void visit(Page page) {
String content = new String(page.getContentData(), StandardCharsets.UTF_8);
System.out.println(content);
}
}
public static void main(String[] args) throws Exception {
JSONObject first = fetchFirstProxy();
String ip = first.getString("ip");
int port = first.getInt("port");
CrawlConfig config = new CrawlConfig();
config.setCrawlStorageFolder("./crawl_data");
config.setPolitenessDelay(200);
config.setMaxDepthOfCrawling(0);
config.setProxyHost(ip);
config.setProxyPort(port);
PageFetcher pageFetcher = new PageFetcher(config);
RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
controller.addSeed("https://httpbin.org/ip");
controller.start(SinglePageCrawler.class, 1);
}
}