JavaScript/Node.js 爬虫代理示例

[!NOTE]代码示例说明

  1. 代码样例不能直接运行,请替换成您自己的代理信息。
  2. 在不同编程语言的代码示例中,需注意其环境版本。
  3. 示例代码使用遇到问题请联系售后客服,我们会为您提供技术支持。

Node.js

=== "http"


const http = require("http");
const url = require("url");

// 要访问的目标页面
const targetUrl = "http://httpbin.org/ip";


const urlParsed = url.parse(targetUrl);

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = "36600";

// 生成一个随机 proxy tunnel
var seed = 1;
function random() {
    var x = Math.sin(seed++) * 10000;
    return x - Math.floor(x);
}
const tunnel = random()*100;
    
// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";

const base64    = new Buffer.from(proxyUser + ":" + proxyPass).toString("base64");

const options = {
    host: proxyHost,
    port: proxyPort,
    path: targetUrl,
    method: "GET",
    headers: {
        "Host": urlParsed.hostname,
        "Proxy-Tunnel": tunnel,
        "Proxy-Authorization" : "Basic " + base64
    }
};

http.request(options, function (res) {
    console.log("got response: " + res.statusCode);
    res.pipe(process.stdout);
}).on("error", function (err) {
    console.log(err);
}).end();

=== "https"


const https = require("https");
const url = require("url");
const httpsProxyAgent = require('https-proxy-agent');

// 要访问的目标页面
const targetUrl = "https://httpbin.org/ip";


const urlParsed = url.parse(targetUrl);

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = "31111";


// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";

var options = urlParsed;
var agent = new httpsProxyAgent("http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort);
options.agent = agent;

https.request(options, function (res) {
    console.log("got response: " + res.statusCode);
    res.pipe(process.stdout);
}).on("error", function (err) {
    console.log(err);
}).end();

=== "https(Proxy-Tunnel)"


const https = require("https");
const url = require("url");
const httpsProxyAgent = require('https-proxy-agent');

// 要访问的目标页面
const targetUrl = "https://httpbin.org/ip";


const urlParsed = url.parse(targetUrl);

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = "31111";


// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";
    
var options = urlParsed;
const proxy_url = "http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort;
var agent_options = url.parse(proxy_url);

agent_options.headers = { "Proxy-Tunnel" : "1" };
var agent = new httpsProxyAgent(agent_options);

options.agent = agent;
    
for(var i=0;i<10;i++){
    https.request(options, function (res) {
        console.log("got response: " + res.statusCode);
        res.pipe(process.stdout);
    }).on("error", function (err) {
        console.log(err);
    }).end();
}

request


const request = require("request");

// 要访问的目标页面
const targetUrl = "http://httpbin.org/ip";

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = "31111";


// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";

const proxyUrl = "http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort;

const proxiedRequest = request.defaults({'proxy': proxyUrl});

const options = {
  url     : targetUrl,
  headers : {
          }
};

proxiedRequest
    .get(options, function (err, res, body) {
        console.log("got response: " + res.statusCode);
    })
    .on("error", function (err) {
        console.log(err);
    })
;              

superagent

const request = require("superagent");

require("superagent-proxy")(request);

// 要访问的目标页面
const targetUrl = "http://httpbin.org/ip";

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = 31111;

// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";

const proxyUrl = "http://" + proxyUser + ":" + proxyPass + "@" + proxyHost + ":" + proxyPort;

request
    .get(targetUrl)
    .proxy(proxyUrl)
    .end(function onResponse(err, res) {
        if (err) {
            return console.log(err);
        }

        console.log(res.status, res.headers);
        console.log(res.text);
    })
;              

axios

const axios = require('axios');

// 要访问的目标页面
const targetUrl = "http://httpbin.org/ip";
const targetHttpsUrl = "https://httpbin.org/ip";

// 代理服务器(产品官网 www.16yun.cn)
const proxyHost = "t.16yun.cn";
const proxyPort = 31111;

// 代理验证信息
const proxyUser = "username";
const proxyPass = "password";

var proxy = {
    host: proxyHost,
    port: proxyPort,
    auth: {
        username: proxyUser,
        password: proxyPass
    }
};

axios.get(targetUrl,{proxy:proxy})
    .then(function (response) {
        // handle success
        console.log(response.data);
    })
    .catch(function (error) {
        // handle error
        console.log(error);
    })
    .finally(function () {
        // always executed
    });              

// 目标为https网站 axios库支持有bug,不推荐使用 
// 具体参看 https://github.com/axios/axios/issues/4531

浏览器与自动化(JS)

PhantomJS

以参数方式传递代理信息,示例如下:

phantomjs --proxy-auth=USERNAME:PASSWORD --proxy=http://t.16yun.cn:31111 --ignore-ssl-errors=true http-demo.js

http-demo.js 内容如下:


    var page = require('webpage').create();
    page.settings.userAgent = 'Mozilla/5.0 UCBrowser/9.4.1.362 U3/0.8.0 Mobile Safari/533.1';

    console.log('The user agent is ' + page.settings.userAgent);
    
    // 生成一个随机 proxy tunnel
    var seed = 1;
    function random() {
        var x = Math.sin(seed++) * 10000;
        return x - Math.floor(x);
    }
    const tunnel = random()*100;

    //page.customHeaders = {
    //  "proxy-tunnel": tunnel,
    //};

    page.onResourceReceived = function(j) {
      for (var i = 0; i < j.headers.length; ++i) {
        console.log(j.headers[i].name + ': ' + j.headers[i].value);
      }
    };

    page.open("http://httpbin.org/ip", {}, function(status) {
      console.log('status> ' + status);
      console.log(page.content);
      setTimeout(function() {
        phantom.exit();
      }, 3000);
    });

CasperJS

以参数方式传递代理信息,示例如下:

casperjs --proxy-auth=USERNAME:PASSWORD --proxy=http://t.16yun.cn:31111  --ignore-ssl-errors=true --ssl-protocol=any http-demo.js

http-demo.js 内容如下:


    var casper = require('casper').create();

    // 生成一个随机 proxy tunnel
    var seed = 1;
    function random() {
        var x = Math.sin(seed++) * 10000;
        return x - Math.floor(x);
    }
    const tunnel = random()*1000;

    casper.on('started', function () {
        this.page.customHeaders = {
            "User-Agent" : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate",
            "Connection" : "keep-alive",
            "Proxy-Tunnel": tunnel
        }
    });
    casper.start("http://httpbin.org/headers");


    casper.then(function() {
        console.log('First Page: ' + this.page.content);
    });
    casper.run();
    

playwright

const { chromium, webkit, firefox } = require('playwright');

(async () => {
    const browser = await chromium.launch({
      proxy: {
        server: 'http://t.16yun.cn:31111',
        username: 'username',
        password: 'password'
      }
    });
      const page = await browser.newPage();
    
      // Subscribe to 'request' and 'response' events.
      page.on('request', request =>
          console.log('>>', request.method(), request.url()));
      page.on('response', response =>
          console.log('<<', response.status(), response.url()));
      await page.goto('https://httpbin.org/ip');
    
      await browser.close();
})();

Puppeteer

    const puppeteer = require('puppeteer');
// 代理服务器(产品官网 www.16yun.cn)
    const proxyServer = 'http://t.16yun.cn:31111';

    const username = 'username';
    const password = 'password';

    (async() => {
        const browser = await puppeteer.launch({
            args: [  '--proxy-server='+proxyServer+'','--no-sandbox', '--disable-setuid-sandbox' ]});
        const page = await browser.newPage();
        await page.authenticate({ username, password });
        await page.goto('https://www.baidu.com');
        const cookies = await page.cookies();
        await console.log(cookies);
        await page.setViewport({width: 320, height: 480});
        await page.screenshot({path: '/screenshots/full.png', fullPage: true});
        await browser.close();
    })();