CurlLib.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. <?php
  2. /**
  3. * Curl wrapper with goodies
  4. * - can switch the UA to test certain browser sensitive features
  5. * - can simulate/establish tor connection
  6. *
  7. * @license MIT
  8. * @cakephp 2.0
  9. * 2011-07-16 ms
  10. */
  11. class CurlLib {
  12. public $settings = array(
  13. 'CURLOPT_SSL_VERIFYPEER' => false,
  14. );
  15. protected $Ch = null;
  16. public $cookie = null;
  17. public $tor = '127.0.0.1:9050';
  18. public $header = array();
  19. public $persistentHeader = array();
  20. protected $lastUrl = '';
  21. public $ua = array(
  22. 'Firefox' => array(
  23. 'Firefox/3.0.2 Linux' => 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.2) Gecko/2008091700 SUSE/3.0.2-5.2 Firefox/3.0.2'
  24. ),
  25. 'IE' => array(
  26. '6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  27. '7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
  28. '8' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)'
  29. ),
  30. 'Konqueror' => array(
  31. 'Konqueror/3.5' => 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko).'
  32. ),
  33. 'Opera' => array(
  34. '9.60' => 'Opera/9.60 (X11; Linux i686; U; de) Presto/2.1.1',
  35. '10' => 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.2.15 Version/10.00'
  36. ),
  37. 'Safari' => array(
  38. '1.0' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.7',
  39. '1.2' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8',
  40. '3.3' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/522.15.5 (KHTML, like Gecko) Version/3.0.3 Safari/522.15.5'
  41. ),
  42. 'Chrome' => array(
  43. '8' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/8.1.0.0 Safari/540.0'
  44. ),
  45. 'Bots' => array(
  46. 'Google' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
  47. )
  48. );
  49. public function set($key, $value) {
  50. return curl_setopt($this->Ch, $key, $value);
  51. }
  52. public function __construct($timeout = 5, $cookie = true) {
  53. $this->cookie = null;
  54. if ($cookie !== false) {
  55. if ($cookie === true) {
  56. $this->cookie['file'] = tempnam(sys_get_temp_dir(), 'curl_cookie');
  57. $this->cookie['remove'] = true;
  58. } else {
  59. $this->cookie['remove'] = false;
  60. $this->cookie['file'] = $cookie;
  61. }
  62. }
  63. $this->Ch = curl_init();
  64. if ($this->cookie !== false) {
  65. $this->set(CURLOPT_COOKIEJAR, $this->cookie['file']);
  66. $this->set(CURLOPT_COOKIEFILE, $this->cookie['file']);
  67. }
  68. $this->set(CURLOPT_FOLLOWLOCATION, true);
  69. $this->set(CURLOPT_ENCODING, "");
  70. $this->set(CURLOPT_RETURNTRANSFER, true);
  71. $this->set(CURLOPT_AUTOREFERER, true);
  72. $this->set(CURLOPT_CONNECTTIMEOUT, $timeout);
  73. $this->set(CURLOPT_TIMEOUT, $timeout);
  74. $this->set(CURLOPT_MAXREDIRS, 10);
  75. $this->setUserAgent();
  76. }
  77. public function setUserAgent($ua = 'Firefox', $version = null) {
  78. if (isset($this->userAgents[$ua])) {
  79. if ($version !== null && isset($this->userAgents[$ua][$version])) {
  80. $ua = $this->userAgents[$ua][$version];
  81. } else {
  82. $ua = array_values($this->userAgents[$ua]);
  83. krsort($ua);
  84. list($ua) = $ua;
  85. }
  86. }
  87. return $this->set(CURLOPT_USERAGENT, $ua);
  88. }
  89. //TODO: use Dummy.FakerLib instead
  90. public function randomizeUserAgent() {
  91. //list of browsers
  92. $agentBrowser = array(
  93. 'Firefox',
  94. 'Safari',
  95. 'Opera',
  96. 'Flock',
  97. 'Internet Explorer',
  98. 'Seamonkey',
  99. 'Konqueror',
  100. 'GoogleBot'
  101. );
  102. //list of operating systems
  103. $agentOS = array(
  104. 'Windows 3.1',
  105. 'Windows 95',
  106. 'Windows 98',
  107. 'Windows 2000',
  108. 'Windows NT',
  109. 'Windows XP',
  110. 'Windows Vista',
  111. 'Redhat Linux',
  112. 'Ubuntu',
  113. 'Fedora',
  114. 'AmigaOS',
  115. 'OS 10.5'
  116. );
  117. //randomly generate UserAgent
  118. $ua = $agentBrowser[rand(0,count($agentBrowser)-1)].'/'.rand(1,8).'.'.rand(0,9).' (' .$agentOS[rand(0,count($agentOS)-1)].' '.rand(1,7).'.'.rand(0,9).'; en-US;)';
  119. $this->setUserAgent($ua);
  120. return $ua;
  121. }
  122. public function setSocks5Proxy($proxy = false) {
  123. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
  124. if ($proxy) {
  125. return $this->set(CURLOPT_PROXY, $proxy);
  126. } else {
  127. return $this->set(CURLOPT_PROXY, false);
  128. }
  129. }
  130. public function setHttpProxy($proxy = false) {
  131. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  132. if ($proxy) {
  133. return $this->set(CURLOPT_PROXY, $proxy);
  134. } else {
  135. return $this->set(CURLOPT_PROXY, false);
  136. }
  137. }
  138. public function setTor($tor = null) {
  139. if ($tor === null) {
  140. $tor = $this->tor;
  141. }
  142. return $this->setSocks5Proxy($tor);
  143. }
  144. public function setHeader($key, $header, $persistent = false) {
  145. if ($persistent) {
  146. $this->persistentHeader[$key] = $header;
  147. } else {
  148. $this->header[$key] = $header;
  149. }
  150. }
  151. public function unsetHeader($key, $persistent = false) {
  152. if ($persistent) {
  153. unset($this->persistentHeader[$key]);
  154. } else {
  155. unset($this->header[$key]);
  156. }
  157. }
  158. public function exec() {
  159. $header = array();
  160. foreach ($this->header as $tk => $tv) {
  161. $header[] = $tk . ': ' . $tv;
  162. }
  163. $this->set(CURLOPT_HTTPHEADER, $header);
  164. $this->header = $this->persistentHeader;
  165. $content = curl_exec($this->Ch);
  166. $info = curl_getinfo($this->Ch);
  167. return array($content, $info);
  168. }
  169. /**
  170. * get/set referer
  171. *
  172. * @param url
  173. * @return mixed
  174. * 2012-06-06 ms
  175. */
  176. public function referer($url = null) {
  177. if ($url === null) {
  178. if ($this->lastUrl !== null) {
  179. return $this->set(CURLOPT_REFERER, $this->lastUrl);
  180. }
  181. } else {
  182. $this->lastUrl = null;
  183. return $this->set(CURLOPT_REFERER, $url);
  184. }
  185. return false;
  186. }
  187. protected function _prepareData($url, $getdata = array(), $data = array()) {
  188. if (strpos($url, '?') === false && ( //If Url has not a "?" in it
  189. (is_array($getdata) && !empty($getdata)) || //And $getdata is array and has more than one value
  190. (!is_array($getdata) && strlen($getdata) > 0))) { //or its a a string and is longer than 0
  191. $url .= '?';
  192. }
  193. $data = array(
  194. $getdata,
  195. $data
  196. );
  197. foreach ($data as $i => $part) {
  198. if (is_array($part)) {
  199. $string = '';
  200. foreach ($part as $key => $value) {
  201. $string .= urlencode($key) . '=' . urlencode($value) . '&';
  202. }
  203. $part = rtrim($string, '&');
  204. } else {
  205. $part = urlencode($part);
  206. }
  207. $data[$i] = $part;
  208. }
  209. $data[0] = $url . $data[0];
  210. return $data;
  211. }
  212. public function post($url, $data = array(), $getdata = array()) {
  213. $this->referer();
  214. $this->set(CURLOPT_POST, true);
  215. $data = $this->_prepareData($url, $getdata, $data);
  216. $this->set(CURLOPT_URL, $data[0]);
  217. $this->set(CURLOPT_POSTFIELDS, $data[1]);
  218. return $this->exec();
  219. }
  220. public function get($url, $data = array()) {
  221. $this->referer();
  222. $this->set(CURLOPT_HTTPGET, true);
  223. $data = $this->_prepareData($url, $data);
  224. $this->set(CURLOPT_URL, $data[0]);
  225. $this->set(CURLOPT_SSL_VERIFYPEER, false);
  226. $this->lastUrl = $url;
  227. return $this->exec();
  228. }
  229. public function __destruct() {
  230. if ($this->cookie !== false) {
  231. if (isset($this->cookie['handle'])) {
  232. fclose($this->cookie['handle']);
  233. }
  234. if ($this->cookie['remove']) {
  235. unlink($this->cookie['file']);
  236. }
  237. }
  238. curl_close($this->Ch);
  239. }
  240. }