PHP 爬虫代理示例

[!NOTE]代码示例说明

  1. 代码样例不能直接运行,请替换成您自己的代理信息。
  2. 在不同编程语言的代码示例中,需注意其环境版本。
  3. 示例代码使用遇到问题请联系售后客服,我们会为您提供技术支持。

=== "stream"

<?php
    // 要访问的目标页面
    $url = "http://httpbin.org/ip";
    $urls = "https://httpbin.org/ip";

    // 代理服务器(产品官网 www.16yun.cn)
    define("PROXY_SERVER", "tcp://t.16yun.cn:31111");

    // 代理身份信息
    define("PROXY_USER", "username");
    define("PROXY_PASS", "password");

    $proxyAuth = base64_encode(PROXY_USER . ":" . PROXY_PASS);

    // 设置 Proxy tunnel
    $tunnel = rand(1,10000);

    $headers = implode("\r\n", [
        "Proxy-Authorization: Basic {$proxyAuth}",
        "Proxy-Tunnel: ${tunnel}",
    ]);
    $sniServer = parse_url($urls, PHP_URL_HOST);
    $options = [
        "http" => [
            "proxy"  => PROXY_SERVER,
            "header" => $headers,
            "method" => "GET",
            'request_fulluri' => true,
        ],
        'ssl' => array(
                'SNI_enabled' => true, // Disable SNI for https over http proxies
                'SNI_server_name' => $sniServer
        )
    ];
    print($url);
    $context = stream_context_create($options);
    $result = file_get_contents($url, false, $context);
    var_dump($result);

    // 访问 HTTPS 页面
    print($urls);
    $context = stream_context_create($options);
    $result = file_get_contents($urls, false, $context);
    var_dump($result);
?>

=== "curl"

<?php

    function curlFile($url,$proxy_ip,$proxy_port,$loginpassw)
    {
        //$loginpassw = 'username:password';
        //$proxy_ip = 't.16yun.cn';
        //$proxy_port = '31111';
        //$url = 'https://httpbin.org/ip';
    
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_PROXYPORT, $proxy_port);
        curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
        curl_setopt($ch, CURLOPT_PROXY, $proxy_ip);
        curl_setopt($ch, CURLOPT_PROXYUSERPWD, $loginpassw);

        // curl used to include a list of accepted CAs, but no longer bundles ANY CA certs. So by default it'll reject all SSL certificates as unverifiable.
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);

        $data = curl_exec($ch);
        curl_close($ch);
    
        return $data;
    }
    
    $data = curlFile('https://httpbin.org/ip','t.16yun.cn',31111,'username:password');
    print($data);

?>

=== "GuzzleHttp"

<?php    
    namespace App\Console\Commands;    
    use Illuminate\Console\Command;

    class Test16Proxy extends Command
    {
        /**
         * The name and signature of the console command.
         *
         * @var string
         */
        protected $signature = 'test:16proxy';

        /**
         * The console command description.
         *
         * @var string
         */
        protected $description = 'Command description';

        /**
         * Create a new command instance.
         *
         * @return void
         */
        public function __construct()
        {
            parent::__construct();
        }

        /**
         * Execute the console command.
         *
         * @return mixed
         */
        public function handle()
        {
            $client = new \GuzzleHttp\Client();
            // 要访问的目标页面
            $targetUrl = "http://httpbin.org/ip";

            // 代理服务器(产品官网 www.16yun.cn)
            define("PROXY_SERVER", "t.16yun.cn:31111");

            // 代理身份信息
            define("PROXY_USER", "username");
            define("PROXY_PASS", "password");

            $proxyAuth = base64_encode(PROXY_USER . ":" . PROXY_PASS);

            $options = [
                "proxy"  => PROXY_SERVER,
                "headers" => [
                    "Proxy-Authorization" => "Basic " . $proxyAuth
                ]
            ];
            //print_r($options);
            $result = $client->request('GET', $targetUrl, $options);
            var_dump($result->getBody()->getContents());
        }
    }
?>