CurlLib.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. <?php
  2. /**
  3. * Curl wrapper with goodies
  4. * - can switch the UA to test certain browser sensitive features
  5. * - can simulate/establish tor connection
  6. *
  7. * @license MIT
  8. * @cakephp 2.x
  9. */
  10. class CurlLib {
  11. public $settings = array(
  12. 'CURLOPT_SSL_VERIFYPEER' => false,
  13. );
  14. protected $Ch = null;
  15. public $cookie = null;
  16. public $tor = '127.0.0.1:9050';
  17. public $header = array();
  18. public $persistentHeader = array();
  19. protected $lastUrl = '';
  20. public $ua = array(
  21. 'Firefox' => array(
  22. 'Firefox/3.0.2 Linux' => 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.2) Gecko/2008091700 SUSE/3.0.2-5.2 Firefox/3.0.2'
  23. ),
  24. 'IE' => array(
  25. '6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  26. '7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
  27. '8' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)'
  28. ),
  29. 'Konqueror' => array(
  30. 'Konqueror/3.5' => 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko).'
  31. ),
  32. 'Opera' => array(
  33. '9.60' => 'Opera/9.60 (X11; Linux i686; U; de) Presto/2.1.1',
  34. '10' => 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.2.15 Version/10.00'
  35. ),
  36. 'Safari' => array(
  37. '1.0' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.7',
  38. '1.2' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8',
  39. '3.3' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/522.15.5 (KHTML, like Gecko) Version/3.0.3 Safari/522.15.5'
  40. ),
  41. 'Chrome' => array(
  42. '8' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/8.1.0.0 Safari/540.0'
  43. ),
  44. 'Bots' => array(
  45. 'Google' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
  46. )
  47. );
  48. public function set($key, $value) {
  49. return curl_setopt($this->Ch, $key, $value);
  50. }
  51. public function __construct($timeout = 5, $cookie = true) {
  52. $this->cookie = null;
  53. if ($cookie !== false) {
  54. if ($cookie === true) {
  55. $this->cookie['file'] = tempnam(sys_get_temp_dir(), 'curl_cookie');
  56. $this->cookie['remove'] = true;
  57. } else {
  58. $this->cookie['remove'] = false;
  59. $this->cookie['file'] = $cookie;
  60. }
  61. }
  62. $this->Ch = curl_init();
  63. if ($this->cookie !== false) {
  64. $this->set(CURLOPT_COOKIEJAR, $this->cookie['file']);
  65. $this->set(CURLOPT_COOKIEFILE, $this->cookie['file']);
  66. }
  67. $this->set(CURLOPT_FOLLOWLOCATION, true);
  68. $this->set(CURLOPT_ENCODING, "");
  69. $this->set(CURLOPT_RETURNTRANSFER, true);
  70. $this->set(CURLOPT_AUTOREFERER, true);
  71. $this->set(CURLOPT_CONNECTTIMEOUT, $timeout);
  72. $this->set(CURLOPT_TIMEOUT, $timeout);
  73. $this->set(CURLOPT_MAXREDIRS, 10);
  74. $this->setUserAgent();
  75. }
  76. public function setUserAgent($ua = 'Firefox', $version = null) {
  77. if (isset($this->userAgents[$ua])) {
  78. if ($version !== null && isset($this->userAgents[$ua][$version])) {
  79. $ua = $this->userAgents[$ua][$version];
  80. } else {
  81. $ua = array_values($this->userAgents[$ua]);
  82. krsort($ua);
  83. list($ua) = $ua;
  84. }
  85. }
  86. return $this->set(CURLOPT_USERAGENT, $ua);
  87. }
  88. //TODO: use Dummy.FakerLib instead
  89. public function randomizeUserAgent() {
  90. //list of browsers
  91. $agentBrowser = array(
  92. 'Firefox',
  93. 'Safari',
  94. 'Opera',
  95. 'Flock',
  96. 'Internet Explorer',
  97. 'Seamonkey',
  98. 'Konqueror',
  99. 'GoogleBot'
  100. );
  101. //list of operating systems
  102. $agentOS = array(
  103. 'Windows 3.1',
  104. 'Windows 95',
  105. 'Windows 98',
  106. 'Windows 2000',
  107. 'Windows NT',
  108. 'Windows XP',
  109. 'Windows Vista',
  110. 'Redhat Linux',
  111. 'Ubuntu',
  112. 'Fedora',
  113. 'AmigaOS',
  114. 'OS 10.5'
  115. );
  116. //randomly generate UserAgent
  117. $ua = $agentBrowser[rand(0, count($agentBrowser) - 1)] . '/' . rand(1, 8) . '.' . rand(0, 9) . ' (' . $agentOS[rand(0, count($agentOS) - 1)] . ' ' . rand(1, 7) . '.' . rand(0, 9) . '; en-US;)';
  118. $this->setUserAgent($ua);
  119. return $ua;
  120. }
  121. public function setSocks5Proxy($proxy = false) {
  122. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
  123. if ($proxy) {
  124. return $this->set(CURLOPT_PROXY, $proxy);
  125. } else {
  126. return $this->set(CURLOPT_PROXY, false);
  127. }
  128. }
  129. public function setHttpProxy($proxy = false) {
  130. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  131. if ($proxy) {
  132. return $this->set(CURLOPT_PROXY, $proxy);
  133. } else {
  134. return $this->set(CURLOPT_PROXY, false);
  135. }
  136. }
  137. public function setTor($tor = null) {
  138. if ($tor === null) {
  139. $tor = $this->tor;
  140. }
  141. return $this->setSocks5Proxy($tor);
  142. }
  143. public function setHeader($key, $header, $persistent = false) {
  144. if ($persistent) {
  145. $this->persistentHeader[$key] = $header;
  146. } else {
  147. $this->header[$key] = $header;
  148. }
  149. }
  150. public function unsetHeader($key, $persistent = false) {
  151. if ($persistent) {
  152. unset($this->persistentHeader[$key]);
  153. } else {
  154. unset($this->header[$key]);
  155. }
  156. }
  157. public function exec() {
  158. $header = array();
  159. foreach ($this->header as $tk => $tv) {
  160. $header[] = $tk . ': ' . $tv;
  161. }
  162. $this->set(CURLOPT_HTTPHEADER, $header);
  163. $this->header = $this->persistentHeader;
  164. $content = curl_exec($this->Ch);
  165. $info = curl_getinfo($this->Ch);
  166. return array($content, $info);
  167. }
  168. /**
  169. * Get/set referer
  170. *
  171. * @param url
  172. * @return mixed
  173. */
  174. public function referer($url = null) {
  175. if ($url === null) {
  176. if ($this->lastUrl !== null) {
  177. return $this->set(CURLOPT_REFERER, $this->lastUrl);
  178. }
  179. } else {
  180. $this->lastUrl = null;
  181. return $this->set(CURLOPT_REFERER, $url);
  182. }
  183. return false;
  184. }
  185. protected function _prepareData($url, $getdata = array(), $data = array()) {
  186. if (strpos($url, '?') === false && ( //If Url has not a "?" in it
  187. (is_array($getdata) && !empty($getdata)) || //And $getdata is array and has more than one value
  188. (!is_array($getdata) && strlen($getdata) > 0))) { //or its a a string and is longer than 0
  189. $url .= '?';
  190. }
  191. $data = array(
  192. $getdata,
  193. $data
  194. );
  195. foreach ($data as $i => $part) {
  196. if (is_array($part)) {
  197. $string = '';
  198. foreach ($part as $key => $value) {
  199. $string .= urlencode($key) . '=' . urlencode($value) . '&';
  200. }
  201. $part = rtrim($string, '&');
  202. } else {
  203. $part = urlencode($part);
  204. }
  205. $data[$i] = $part;
  206. }
  207. $data[0] = $url . $data[0];
  208. return $data;
  209. }
  210. public function post($url, $data = array(), $getdata = array()) {
  211. $this->referer();
  212. $this->set(CURLOPT_POST, true);
  213. $data = $this->_prepareData($url, $getdata, $data);
  214. $this->set(CURLOPT_URL, $data[0]);
  215. $this->set(CURLOPT_POSTFIELDS, $data[1]);
  216. return $this->exec();
  217. }
  218. public function get($url, $data = array()) {
  219. $this->referer();
  220. $this->set(CURLOPT_HTTPGET, true);
  221. $data = $this->_prepareData($url, $data);
  222. $this->set(CURLOPT_URL, $data[0]);
  223. $this->set(CURLOPT_SSL_VERIFYPEER, false);
  224. $this->lastUrl = $url;
  225. return $this->exec();
  226. }
  227. public function __destruct() {
  228. if ($this->cookie !== false) {
  229. if (isset($this->cookie['handle'])) {
  230. fclose($this->cookie['handle']);
  231. }
  232. if ($this->cookie['remove']) {
  233. unlink($this->cookie['file']);
  234. }
  235. }
  236. curl_close($this->Ch);
  237. }
  238. }