Java API代理示例

阅读模式

流程:调用 API → 解析 JSON → 处理错误 → 使用返回代理访问目标站。

提示

下述示例均采用 IP 白名单模式(无需用户名密码认证)。请使用合适的 JSON 解析库,生产环境可选 Jackson/Gson 等。

HttpClient 4.x

1
import org.apache.http.HttpHost;
2
import org.apache.http.client.config.RequestConfig;
3
import org.apache.http.client.methods.CloseableHttpResponse;
4
import org.apache.http.client.methods.HttpGet;
5
import org.apache.http.impl.client.CloseableHttpClient;
6
import org.apache.http.impl.client.HttpClients;
7
import org.apache.http.util.EntityUtils;
8
import org.json.JSONArray;
9
import org.json.JSONObject;
10
11
public class ApiProxyDemo {
12
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
13
14
static String httpGet(String url) throws Exception {
15
try (CloseableHttpClient client = HttpClients.createDefault()) {
16
HttpGet get = new HttpGet(url);
17
try (CloseableHttpResponse resp = client.execute(get)) {
18
int code = resp.getStatusLine().getStatusCode();
19
if (code != 200) {
20
throw new RuntimeException("API错误: " + code);
21
}
22
return EntityUtils.toString(resp.getEntity(), "UTF-8");
23
}
24
}
25
}
26
27
static void visitWithProxy(String ip, int port) throws Exception {
28
HttpHost proxy = new HttpHost(ip, port, "http");
29
RequestConfig cfg = RequestConfig.custom().setProxy(proxy).setConnectTimeout(10000).setSocketTimeout(10000).build();
30
31
try (CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(cfg).build()) {
32
HttpGet get = new HttpGet("https://httpbin.org/ip");
33
try (CloseableHttpResponse resp = client.execute(get)) {
34
int code = resp.getStatusLine().getStatusCode();
35
if (code != 200) throw new RuntimeException("访问失败: " + code);
36
System.out.println(EntityUtils.toString(resp.getEntity(), "UTF-8"));
37
}
38
}
39
}
40
41
public static void main(String[] args) throws Exception {
42
String body = httpGet(apiUrl);
43
JSONArray arr = new JSONArray(body);
44
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
45
JSONObject first = arr.getJSONObject(0);
46
String ip = first.getString("ip");
47
int port = first.getInt("port");
48
visitWithProxy(ip, port);
49
}
50
}

OkHttp

1
import okhttp3.*;
2
import org.json.JSONArray;
3
import org.json.JSONObject;
4
5
public class ApiProxyOkHttpDemo {
6
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
7
8
public static void main(String[] args) throws Exception {
9
OkHttpClient client = new OkHttpClient.Builder().build();
10
Request req = new Request.Builder().url(apiUrl).build();
11
Response r = client.newCall(req).execute();
12
if (!r.isSuccessful()) throw new RuntimeException("API错误: " + r.code());
13
JSONArray arr = new JSONArray(r.body().string());
14
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
15
JSONObject first = arr.getJSONObject(0);
16
String ip = first.getString("ip");
17
int port = first.getInt("port");
18
19
java.net.Proxy proxy = new java.net.Proxy(java.net.Proxy.Type.HTTP, new java.net.InetSocketAddress(ip, port));
20
OkHttpClient proxied = new OkHttpClient.Builder().proxy(proxy).build();
21
Request rr = new Request.Builder().url("https://httpbin.org/ip").build();
22
Response r2 = proxied.newCall(rr).execute();
23
if (!r2.isSuccessful()) throw new RuntimeException("访问失败:" + r2.code());
24
System.out.println(r2.body().string());
25
}
26
}

Jsoup

1
import org.jsoup.Connection;
2
import org.jsoup.Jsoup;
3
import org.jsoup.nodes.Document;
4
import org.json.JSONArray;
5
import org.json.JSONObject;
6
7
public class ApiProxyJsoupDemo {
8
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
9
10
static JSONObject fetchFirstProxy() throws Exception {
11
Connection.Response res = Jsoup.connect(apiUrl)
12
.ignoreContentType(true)
13
.timeout(10_000)
14
.execute();
15
int code = res.statusCode();
16
if (code != 200) {
17
switch (code) {
18
case 400: throw new RuntimeException("API错误 400: 参数错误");
19
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
20
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
21
default: throw new RuntimeException("API错误: " + code);
22
}
23
}
24
JSONArray arr = new JSONArray(res.body());
25
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
26
return arr.getJSONObject(0);
27
}
28
29
static void visitWithJsoup(String ip, int port) throws Exception {
30
Document doc = Jsoup.connect("https://httpbin.org/ip")
31
.proxy(ip, port)
32
.userAgent("Mozilla/5.0")
33
.timeout(10_000)
34
.get();
35
System.out.println(doc.body().text());
36
}
37
38
public static void main(String[] args) throws Exception {
39
JSONObject first = fetchFirstProxy();
40
String ip = first.getString("ip");
41
int port = first.getInt("port");
42
visitWithJsoup(ip, port);
43
}
44
}

Selenium(Chrome 示例)

1
import org.json.JSONArray;
2
import org.json.JSONObject;
3
import org.openqa.selenium.WebDriver;
4
import org.openqa.selenium.chrome.ChromeDriver;
5
import org.openqa.selenium.chrome.ChromeOptions;
6
7
import java.net.URI;
8
import java.net.http.HttpClient;
9
import java.net.http.HttpRequest;
10
import java.net.http.HttpResponse;
11
import java.time.Duration;
12
13
public class ApiProxySeleniumDemo {
14
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
15
16
static JSONObject fetchFirstProxy() throws Exception {
17
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
18
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
19
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
20
int code = resp.statusCode();
21
if (code != 200) {
22
switch (code) {
23
case 400: throw new RuntimeException("API错误 400: 参数错误");
24
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
25
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
26
default: throw new RuntimeException("API错误: " + code);
27
}
28
}
29
JSONArray arr = new JSONArray(resp.body());
30
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
31
return arr.getJSONObject(0);
32
}
33
34
public static void main(String[] args) throws Exception {
35
JSONObject first = fetchFirstProxy();
36
String ip = first.getString("ip");
37
int port = first.getInt("port");
38
39
ChromeOptions options = new ChromeOptions();
40
options.addArguments("--proxy-server=http://" + ip + ":" + port);
41
WebDriver driver = new ChromeDriver(options);
42
try {
43
driver.get("https://httpbin.org/ip");
44
System.out.println(driver.getPageSource());
45
} finally {
46
driver.quit();
47
}
48
}
49
}

WebMagic

1
import org.json.JSONArray;
2
import org.json.JSONObject;
3
import us.codecraft.webmagic.Page;
4
import us.codecraft.webmagic.Site;
5
import us.codecraft.webmagic.Spider;
6
import us.codecraft.webmagic.processor.PageProcessor;
7
import us.codecraft.webmagic.downloader.HttpClientDownloader;
8
import us.codecraft.webmagic.proxy.Proxy;
9
import us.codecraft.webmagic.proxy.SimpleProxyProvider;
10
11
import java.net.URI;
12
import java.net.http.HttpClient;
13
import java.net.http.HttpRequest;
14
import java.net.http.HttpResponse;
15
import java.time.Duration;
16
17
public class ApiProxyWebMagicDemo implements PageProcessor {
18
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
19
20
private final Site site = Site.me().setRetryTimes(1).setTimeOut(10_000);
21
22
static JSONObject fetchFirstProxy() throws Exception {
23
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
24
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
25
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
26
int code = resp.statusCode();
27
if (code != 200) {
28
switch (code) {
29
case 400: throw new RuntimeException("API错误 400: 参数错误");
30
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
31
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
32
default: throw new RuntimeException("API错误: " + code);
33
}
34
}
35
JSONArray arr = new JSONArray(resp.body());
36
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
37
return arr.getJSONObject(0);
38
}
39
40
@Override
41
public void process(Page page) {
42
System.out.println(page.getRawText());
43
}
44
45
@Override
46
public Site getSite() {
47
return site;
48
}
49
50
public static void main(String[] args) throws Exception {
51
JSONObject first = fetchFirstProxy();
52
String ip = first.getString("ip");
53
int port = first.getInt("port");
54
55
HttpClientDownloader downloader = new HttpClientDownloader();
56
downloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(ip, port)));
57
58
Spider.create(new ApiProxyWebMagicDemo())
59
.setDownloader(downloader)
60
.addUrl("https://httpbin.org/ip")
61
.run();
62
}
63
}

crawler4j

1
import org.json.JSONArray;
2
import org.json.JSONObject;
3
4
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
5
import edu.uci.ics.crawler4j.crawler.CrawlController;
6
import edu.uci.ics.crawler4j.crawler.Page;
7
import edu.uci.ics.crawler4j.crawler.WebCrawler;
8
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
9
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
10
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
11
import edu.uci.ics.crawler4j.url.WebURL;
12
13
import java.net.URI;
14
import java.net.http.HttpClient;
15
import java.net.http.HttpRequest;
16
import java.net.http.HttpResponse;
17
import java.nio.charset.StandardCharsets;
18
import java.time.Duration;
19
20
public class ApiProxyCrawler4jDemo {
21
static String apiUrl = "http://ip.16yun.cn:817/myip/pl/<ORDER_ID>/?s=<ORDER_SIGN>&u=<USER>&format=json";
22
23
static JSONObject fetchFirstProxy() throws Exception {
24
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
25
HttpRequest req = HttpRequest.newBuilder(URI.create(apiUrl)).timeout(Duration.ofSeconds(10)).GET().build();
26
HttpResponse<String> resp = client.send(req, HttpResponse.BodyHandlers.ofString());
27
int code = resp.statusCode();
28
if (code != 200) {
29
switch (code) {
30
case 400: throw new RuntimeException("API错误 400: 参数错误");
31
case 403: throw new RuntimeException("API错误 403: 主机IP不在白名单");
32
case 429: throw new RuntimeException("API错误 429: 提取频率过快");
33
default: throw new RuntimeException("API错误: " + code);
34
}
35
}
36
JSONArray arr = new JSONArray(resp.body());
37
if (arr.length() == 0) throw new RuntimeException("API 返回为空");
38
return arr.getJSONObject(0);
39
}
40
41
public static class SinglePageCrawler extends WebCrawler {
42
@Override
43
public boolean shouldVisit(Page referringPage, WebURL url) {
44
return true; // 仅示例
45
}
46
47
@Override
48
public void visit(Page page) {
49
String content = new String(page.getContentData(), StandardCharsets.UTF_8);
50
System.out.println(content);
51
}
52
}
53
54
public static void main(String[] args) throws Exception {
55
JSONObject first = fetchFirstProxy();
56
String ip = first.getString("ip");
57
int port = first.getInt("port");
58
59
CrawlConfig config = new CrawlConfig();
60
config.setCrawlStorageFolder("./crawl_data");
61
config.setPolitenessDelay(200);
62
config.setMaxDepthOfCrawling(0);
63
config.setProxyHost(ip);
64
config.setProxyPort(port);
65
66
PageFetcher pageFetcher = new PageFetcher(config);
67
RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
68
RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
69
CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
70
71
controller.addSeed("https://httpbin.org/ip");
72
controller.start(SinglePageCrawler.class, 1);
73
}
74
}