JavaScript/Node.js 爬虫代理示例
代码示例说明
- 代码样例不能直接运行,请替换成您自己的代理信息。
- 在不同编程语言的代码示例中,需注意其环境版本。
- 示例代码使用遇到问题请,我们会为您提供技术支持。
Node.js
12const http = require("http");3const url = require("url");45// 要访问的目标页面6const targetUrl = "http://httpbin.org/ip";789const urlParsed = url.parse(targetUrl);1011// 代理服务器(产品官网 www.16yun.cn)12const proxyHost = "t.16yun.cn";13const proxyPort = "36600";1415// 生成一个随机 proxy tunnel16var seed = 1;17function random() {18var x = Math.sin(seed++) * 10000;19return x - Math.floor(x);20}21const tunnel = random()*100;2223// 代理验证信息24const proxyUser = "username";25const proxyPass = "password";2627const base64 = new Buffer.from(proxyUser + ":" + proxyPass).toString("base64");2829const options = {30host: proxyHost,31port: proxyPort,32path: targetUrl,33method: "GET",34headers: {35"Host": urlParsed.hostname,36"Proxy-Tunnel": tunnel,37"Proxy-Authorization" : "Basic " + base6438}39};4041http.request(options, function (res) {42console.log("got response: " + res.statusCode);43res.pipe(process.stdout);44}).on("error", function (err) {45console.log(err);46}).end();
request
12const request = require("request");34// 要访问的目标页面5const targetUrl = "http://httpbin.org/ip";67// 代理服务器(产品官网 www.16yun.cn)8const proxyHost = "t.16yun.cn";9const proxyPort = "31111";101112// 代理验证信息13const proxyUser = "username";14const proxyPass = "password";1516const proxyUrl = "http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort;1718const proxiedRequest = request.defaults({'proxy': proxyUrl});1920const options = {21url : targetUrl,22headers : {23}24};2526proxiedRequest27.get(options, function (err, res, body) {28console.log("got response: " + res.statusCode);29})30.on("error", function (err) {31console.log(err);32})33;
superagent
1const request = require("superagent");23require("superagent-proxy")(request);45// 要访问的目标页面6const targetUrl = "http://httpbin.org/ip";78// 代理服务器(产品官网 www.16yun.cn)9const proxyHost = "t.16yun.cn";10const proxyPort = 31111;1112// 代理验证信息13const proxyUser = "username";14const proxyPass = "password";1516const proxyUrl = "http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort;1718request19.get(targetUrl)20.proxy(proxyUrl)21.end(function onResponse(err, res) {22if (err) {23return console.log(err);24}2526console.log(res.status, res.headers);27console.log(res.text);28})29;
axios
1const axios = require('axios');23// 要访问的目标页面4const targetUrl = "http://httpbin.org/ip";5const targetHttpsUrl = "https://httpbin.org/ip";67// 代理服务器(产品官网 www.16yun.cn)8const proxyHost = "t.16yun.cn";9const proxyPort = 31111;1011// 代理验证信息12const proxyUser = "username";13const proxyPass = "password";1415var proxy = {16host: proxyHost,17port: proxyPort,18auth: {19username: proxyUser,20password: proxyPass21}22};2324axios.get(targetUrl,{proxy:proxy})25.then(function (response) {26// handle success27console.log(response.data);28})29.catch(function (error) {30// handle error31console.log(error);32})33.finally(function () {34// always executed35});3637// 目标为https网站 axios库支持有bug,不推荐使用38// 具体参看 https://github.com/axios/axios/issues/4531
浏览器与自动化(JS)
PhantomJS
以参数方式传递代理信息,示例如下:
1phantomjs --proxy-auth=USERNAME:PASSWORD --proxy=http://t.16yun.cn:31111 --ignore-ssl-errors=true http-demo.js
http-demo.js 内容如下:
12var page = require('webpage').create();3page.settings.userAgent = 'Mozilla/5.0 UCBrowser/9.4.1.362 U3/0.8.0 Mobile Safari/533.1';45console.log('The user agent is ' + page.settings.userAgent);67// 生成一个随机 proxy tunnel8var seed = 1;9function random() {10var x = Math.sin(seed++) * 10000;11return x - Math.floor(x);12}13const tunnel = random()*100;1415//page.customHeaders = {16// "proxy-tunnel": tunnel,17//};1819page.onResourceReceived = function(j) {20for (var i = 0; i < j.headers.length; ++i) {21console.log(j.headers[i].name + ': ' + j.headers[i].value);22}23};2425page.open("http://httpbin.org/ip", {}, function(status) {26console.log('status> ' + status);27console.log(page.content);28setTimeout(function() {29phantom.exit();30}, 3000);31});
CasperJS
以参数方式传递代理信息,示例如下:
1casperjs --proxy-auth=USERNAME:PASSWORD --proxy=http://t.16yun.cn:31111 --ignore-ssl-errors=true --ssl-protocol=any http-demo.js
http-demo.js 内容如下:
12var casper = require('casper').create();34// 生成一个随机 proxy tunnel5var seed = 1;6function random() {7var x = Math.sin(seed++) * 10000;8return x - Math.floor(x);9}10const tunnel = random()*1000;1112casper.on('started', function () {13this.page.customHeaders = {14"User-Agent" : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0",15"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",16"Accept-Language": "en-US,en;q=0.5",17"Accept-Encoding": "gzip, deflate",18"Connection" : "keep-alive",19"Proxy-Tunnel": tunnel20}21});22casper.start("http://httpbin.org/headers");232425casper.then(function() {26console.log('First Page: ' + this.page.content);27});28casper.run();29
playwright
1const { chromium, webkit, firefox } = require('playwright');23(async () => {4const browser = await chromium.launch({5proxy: {6server: 'http://t.16yun.cn:31111',7username: 'username',8password: 'password'9}10});11const page = await browser.newPage();1213// Subscribe to 'request' and 'response' events.14page.on('request', request =>15console.log('>>', request.method(), request.url()));16page.on('response', response =>17console.log('<<', response.status(), response.url()));18await page.goto('https://httpbin.org/ip');1920await browser.close();21})();
Puppeteer
1const puppeteer = require('puppeteer');2// 代理服务器(产品官网 www.16yun.cn)3const proxyServer = 'http://t.16yun.cn:31111';45const username = 'username';6const password = 'password';78(async() => {9const browser = await puppeteer.launch({10args: [ '--proxy-server='+proxyServer+'','--no-sandbox', '--disable-setuid-sandbox' ]});11const page = await browser.newPage();12await page.authenticate({ username, password });13await page.goto('https://www.baidu.com');14const cookies = await page.cookies();15await console.log(cookies);16await page.setViewport({width: 320, height: 480});17await page.screenshot({path: '/screenshots/full.png', fullPage: true});18await browser.close();19})();