| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- <?php
- /**
- * Curl wrapper with goodies
- * - can switch the UA to test certain browser sensitive features
- * - can simulate/establish tor connection
- *
- * @license http://opensource.org/licenses/mit-license.php MIT
- */
- class CurlLib {
- public $settings = [
- 'CURLOPT_SSL_VERIFYPEER' => false,
- ];
- protected $Ch = null;
- public $cookie = null;
- public $tor = '127.0.0.1:9050';
- public $header = [];
- public $persistentHeader = [];
- protected $lastUrl = '';
- public $ua = [
- 'Firefox' => [
- 'Firefox/3.0.2 Linux' => 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.2) Gecko/2008091700 SUSE/3.0.2-5.2 Firefox/3.0.2'
- ],
- 'IE' => [
- '6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
- '7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
- '8' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)'
- ],
- 'Konqueror' => [
- 'Konqueror/3.5' => 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko).'
- ],
- 'Opera' => [
- '9.60' => 'Opera/9.60 (X11; Linux i686; U; de) Presto/2.1.1',
- '10' => 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.2.15 Version/10.00'
- ],
- 'Safari' => [
- '1.0' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.7',
- '1.2' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8',
- '3.3' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/522.15.5 (KHTML, like Gecko) Version/3.0.3 Safari/522.15.5'
- ],
- 'Chrome' => [
- '8' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/8.1.0.0 Safari/540.0'
- ],
- 'Bots' => [
- 'Google' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
- ]
- ];
- public function set($key, $value) {
- return curl_setopt($this->Ch, $key, $value);
- }
- public function __construct($timeout = 5, $cookie = true) {
- $this->cookie = null;
- if ($cookie !== false) {
- if ($cookie === true) {
- $this->cookie['file'] = tempnam(sys_get_temp_dir(), 'curl_cookie');
- $this->cookie['remove'] = true;
- } else {
- $this->cookie['remove'] = false;
- $this->cookie['file'] = $cookie;
- }
- }
- $this->Ch = curl_init();
- if ($this->cookie !== false) {
- $this->set(CURLOPT_COOKIEJAR, $this->cookie['file']);
- $this->set(CURLOPT_COOKIEFILE, $this->cookie['file']);
- }
- $this->set(CURLOPT_FOLLOWLOCATION, true);
- $this->set(CURLOPT_ENCODING, "");
- $this->set(CURLOPT_RETURNTRANSFER, true);
- $this->set(CURLOPT_AUTOREFERER, true);
- $this->set(CURLOPT_CONNECTTIMEOUT, $timeout);
- $this->set(CURLOPT_TIMEOUT, $timeout);
- $this->set(CURLOPT_MAXREDIRS, 10);
- $this->setUserAgent();
- }
- public function setUserAgent($ua = 'Firefox', $version = null) {
- if (isset($this->userAgents[$ua])) {
- if ($version !== null && isset($this->userAgents[$ua][$version])) {
- $ua = $this->userAgents[$ua][$version];
- } else {
- $ua = array_values($this->userAgents[$ua]);
- krsort($ua);
- list($ua) = $ua;
- }
- }
- return $this->set(CURLOPT_USERAGENT, $ua);
- }
- //TODO: use Dummy.FakerLib instead
- public function randomizeUserAgent() {
- //list of browsers
- $agentBrowser = [
- 'Firefox',
- 'Safari',
- 'Opera',
- 'Flock',
- 'Internet Explorer',
- 'Seamonkey',
- 'Konqueror',
- 'GoogleBot'
- ];
- //list of operating systems
- $agentOS = [
- 'Windows 3.1',
- 'Windows 95',
- 'Windows 98',
- 'Windows 2000',
- 'Windows NT',
- 'Windows XP',
- 'Windows Vista',
- 'Redhat Linux',
- 'Ubuntu',
- 'Fedora',
- 'AmigaOS',
- 'OS 10.5'
- ];
- //randomly generate UserAgent
- $ua = $agentBrowser[rand(0, count($agentBrowser) - 1)] . '/' . rand(1, 8) . '.' . rand(0, 9) . ' (' . $agentOS[rand(0, count($agentOS) - 1)] . ' ' . rand(1, 7) . '.' . rand(0, 9) . '; en-US;)';
- $this->setUserAgent($ua);
- return $ua;
- }
- public function setSocks5Proxy($proxy = false) {
- $this->set(CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
- if ($proxy) {
- return $this->set(CURLOPT_PROXY, $proxy);
- } else {
- return $this->set(CURLOPT_PROXY, false);
- }
- }
- public function setHttpProxy($proxy = false) {
- $this->set(CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
- if ($proxy) {
- return $this->set(CURLOPT_PROXY, $proxy);
- } else {
- return $this->set(CURLOPT_PROXY, false);
- }
- }
- public function setTor($tor = null) {
- if ($tor === null) {
- $tor = $this->tor;
- }
- return $this->setSocks5Proxy($tor);
- }
- public function setHeader($key, $header, $persistent = false) {
- if ($persistent) {
- $this->persistentHeader[$key] = $header;
- } else {
- $this->header[$key] = $header;
- }
- }
- public function unsetHeader($key, $persistent = false) {
- if ($persistent) {
- unset($this->persistentHeader[$key]);
- } else {
- unset($this->header[$key]);
- }
- }
- public function exec() {
- $header = [];
- foreach ($this->header as $tk => $tv) {
- $header[] = $tk . ': ' . $tv;
- }
- $this->set(CURLOPT_HTTPHEADER, $header);
- $this->header = $this->persistentHeader;
- $content = curl_exec($this->Ch);
- $info = curl_getinfo($this->Ch);
- return [$content, $info];
- }
- /**
- * Get/set referer
- *
- * @param url
- * @return mixed
- */
- public function referer($url = null) {
- if ($url === null) {
- if ($this->lastUrl !== null) {
- return $this->set(CURLOPT_REFERER, $this->lastUrl);
- }
- } else {
- $this->lastUrl = null;
- return $this->set(CURLOPT_REFERER, $url);
- }
- return false;
- }
- protected function _prepareData($url, $getdata = [], $data = []) {
- if (strpos($url, '?') === false && ( // If Url has not a "?" in it
- (is_array($getdata) && !empty($getdata)) || //And $getdata is array and has more than one value
- (!is_array($getdata) && strlen($getdata) > 0))) { //or its a a string and is longer than 0
- $url .= '?';
- }
- $data = [
- $getdata,
- $data
- ];
- foreach ($data as $i => $part) {
- if (is_array($part)) {
- $string = '';
- foreach ($part as $key => $value) {
- $string .= urlencode($key) . '=' . urlencode($value) . '&';
- }
- $part = rtrim($string, '&');
- } else {
- $part = urlencode($part);
- }
- $data[$i] = $part;
- }
- $data[0] = $url . $data[0];
- return $data;
- }
- public function post($url, $data = [], $getdata = []) {
- $this->referer();
- $this->set(CURLOPT_POST, true);
- $data = $this->_prepareData($url, $getdata, $data);
- $this->set(CURLOPT_URL, $data[0]);
- $this->set(CURLOPT_POSTFIELDS, $data[1]);
- return $this->exec();
- }
- public function get($url, $data = []) {
- $this->referer();
- $this->set(CURLOPT_HTTPGET, true);
- $data = $this->_prepareData($url, $data);
- $this->set(CURLOPT_URL, $data[0]);
- $this->set(CURLOPT_SSL_VERIFYPEER, false);
- $this->lastUrl = $url;
- return $this->exec();
- }
- public function __destruct() {
- if ($this->cookie !== false) {
- if (isset($this->cookie['handle'])) {
- fclose($this->cookie['handle']);
- }
- if ($this->cookie['remove']) {
- unlink($this->cookie['file']);
- }
- }
- curl_close($this->Ch);
- }
- }
|