CurlLib.php 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. <?php
  2. /**
  3. * Curl wrapper with goodies
  4. * - can switch the UA to test certain browser sensitive features
  5. * - can simulate/establish tor connection
  6. *
  7. * @license http://opensource.org/licenses/mit-license.php MIT
  8. */
  9. class CurlLib {
  10. public $settings = [
  11. 'CURLOPT_SSL_VERIFYPEER' => false,
  12. ];
  13. protected $Ch = null;
  14. public $cookie = null;
  15. public $tor = '127.0.0.1:9050';
  16. public $header = [];
  17. public $persistentHeader = [];
  18. protected $lastUrl = '';
  19. public $ua = [
  20. 'Firefox' => [
  21. 'Firefox/3.0.2 Linux' => 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.2) Gecko/2008091700 SUSE/3.0.2-5.2 Firefox/3.0.2'
  22. ],
  23. 'IE' => [
  24. '6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  25. '7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
  26. '8' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)'
  27. ],
  28. 'Konqueror' => [
  29. 'Konqueror/3.5' => 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko).'
  30. ],
  31. 'Opera' => [
  32. '9.60' => 'Opera/9.60 (X11; Linux i686; U; de) Presto/2.1.1',
  33. '10' => 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.2.15 Version/10.00'
  34. ],
  35. 'Safari' => [
  36. '1.0' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.7',
  37. '1.2' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8',
  38. '3.3' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/522.15.5 (KHTML, like Gecko) Version/3.0.3 Safari/522.15.5'
  39. ],
  40. 'Chrome' => [
  41. '8' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/8.1.0.0 Safari/540.0'
  42. ],
  43. 'Bots' => [
  44. 'Google' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
  45. ]
  46. ];
  47. public function set($key, $value) {
  48. return curl_setopt($this->Ch, $key, $value);
  49. }
  50. public function __construct($timeout = 5, $cookie = true) {
  51. $this->cookie = null;
  52. if ($cookie !== false) {
  53. if ($cookie === true) {
  54. $this->cookie['file'] = tempnam(sys_get_temp_dir(), 'curl_cookie');
  55. $this->cookie['remove'] = true;
  56. } else {
  57. $this->cookie['remove'] = false;
  58. $this->cookie['file'] = $cookie;
  59. }
  60. }
  61. $this->Ch = curl_init();
  62. if ($this->cookie !== false) {
  63. $this->set(CURLOPT_COOKIEJAR, $this->cookie['file']);
  64. $this->set(CURLOPT_COOKIEFILE, $this->cookie['file']);
  65. }
  66. $this->set(CURLOPT_FOLLOWLOCATION, true);
  67. $this->set(CURLOPT_ENCODING, "");
  68. $this->set(CURLOPT_RETURNTRANSFER, true);
  69. $this->set(CURLOPT_AUTOREFERER, true);
  70. $this->set(CURLOPT_CONNECTTIMEOUT, $timeout);
  71. $this->set(CURLOPT_TIMEOUT, $timeout);
  72. $this->set(CURLOPT_MAXREDIRS, 10);
  73. $this->setUserAgent();
  74. }
  75. public function setUserAgent($ua = 'Firefox', $version = null) {
  76. if (isset($this->userAgents[$ua])) {
  77. if ($version !== null && isset($this->userAgents[$ua][$version])) {
  78. $ua = $this->userAgents[$ua][$version];
  79. } else {
  80. $ua = array_values($this->userAgents[$ua]);
  81. krsort($ua);
  82. list($ua) = $ua;
  83. }
  84. }
  85. return $this->set(CURLOPT_USERAGENT, $ua);
  86. }
  87. //TODO: use Dummy.FakerLib instead
  88. public function randomizeUserAgent() {
  89. //list of browsers
  90. $agentBrowser = [
  91. 'Firefox',
  92. 'Safari',
  93. 'Opera',
  94. 'Flock',
  95. 'Internet Explorer',
  96. 'Seamonkey',
  97. 'Konqueror',
  98. 'GoogleBot'
  99. ];
  100. //list of operating systems
  101. $agentOS = [
  102. 'Windows 3.1',
  103. 'Windows 95',
  104. 'Windows 98',
  105. 'Windows 2000',
  106. 'Windows NT',
  107. 'Windows XP',
  108. 'Windows Vista',
  109. 'Redhat Linux',
  110. 'Ubuntu',
  111. 'Fedora',
  112. 'AmigaOS',
  113. 'OS 10.5'
  114. ];
  115. //randomly generate UserAgent
  116. $ua = $agentBrowser[rand(0, count($agentBrowser) - 1)] . '/' . rand(1, 8) . '.' . rand(0, 9) . ' (' . $agentOS[rand(0, count($agentOS) - 1)] . ' ' . rand(1, 7) . '.' . rand(0, 9) . '; en-US;)';
  117. $this->setUserAgent($ua);
  118. return $ua;
  119. }
  120. public function setSocks5Proxy($proxy = false) {
  121. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
  122. if ($proxy) {
  123. return $this->set(CURLOPT_PROXY, $proxy);
  124. } else {
  125. return $this->set(CURLOPT_PROXY, false);
  126. }
  127. }
  128. public function setHttpProxy($proxy = false) {
  129. $this->set(CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  130. if ($proxy) {
  131. return $this->set(CURLOPT_PROXY, $proxy);
  132. } else {
  133. return $this->set(CURLOPT_PROXY, false);
  134. }
  135. }
  136. public function setTor($tor = null) {
  137. if ($tor === null) {
  138. $tor = $this->tor;
  139. }
  140. return $this->setSocks5Proxy($tor);
  141. }
  142. public function setHeader($key, $header, $persistent = false) {
  143. if ($persistent) {
  144. $this->persistentHeader[$key] = $header;
  145. } else {
  146. $this->header[$key] = $header;
  147. }
  148. }
  149. public function unsetHeader($key, $persistent = false) {
  150. if ($persistent) {
  151. unset($this->persistentHeader[$key]);
  152. } else {
  153. unset($this->header[$key]);
  154. }
  155. }
  156. public function exec() {
  157. $header = [];
  158. foreach ($this->header as $tk => $tv) {
  159. $header[] = $tk . ': ' . $tv;
  160. }
  161. $this->set(CURLOPT_HTTPHEADER, $header);
  162. $this->header = $this->persistentHeader;
  163. $content = curl_exec($this->Ch);
  164. $info = curl_getinfo($this->Ch);
  165. return [$content, $info];
  166. }
  167. /**
  168. * Get/set referer
  169. *
  170. * @param url
  171. * @return mixed
  172. */
  173. public function referer($url = null) {
  174. if ($url === null) {
  175. if ($this->lastUrl !== null) {
  176. return $this->set(CURLOPT_REFERER, $this->lastUrl);
  177. }
  178. } else {
  179. $this->lastUrl = null;
  180. return $this->set(CURLOPT_REFERER, $url);
  181. }
  182. return false;
  183. }
  184. protected function _prepareData($url, $getdata = [], $data = []) {
  185. if (strpos($url, '?') === false && ( // If Url has not a "?" in it
  186. (is_array($getdata) && !empty($getdata)) || //And $getdata is array and has more than one value
  187. (!is_array($getdata) && strlen($getdata) > 0))) { //or its a a string and is longer than 0
  188. $url .= '?';
  189. }
  190. $data = [
  191. $getdata,
  192. $data
  193. ];
  194. foreach ($data as $i => $part) {
  195. if (is_array($part)) {
  196. $string = '';
  197. foreach ($part as $key => $value) {
  198. $string .= urlencode($key) . '=' . urlencode($value) . '&';
  199. }
  200. $part = rtrim($string, '&');
  201. } else {
  202. $part = urlencode($part);
  203. }
  204. $data[$i] = $part;
  205. }
  206. $data[0] = $url . $data[0];
  207. return $data;
  208. }
  209. public function post($url, $data = [], $getdata = []) {
  210. $this->referer();
  211. $this->set(CURLOPT_POST, true);
  212. $data = $this->_prepareData($url, $getdata, $data);
  213. $this->set(CURLOPT_URL, $data[0]);
  214. $this->set(CURLOPT_POSTFIELDS, $data[1]);
  215. return $this->exec();
  216. }
  217. public function get($url, $data = []) {
  218. $this->referer();
  219. $this->set(CURLOPT_HTTPGET, true);
  220. $data = $this->_prepareData($url, $data);
  221. $this->set(CURLOPT_URL, $data[0]);
  222. $this->set(CURLOPT_SSL_VERIFYPEER, false);
  223. $this->lastUrl = $url;
  224. return $this->exec();
  225. }
  226. public function __destruct() {
  227. if ($this->cookie !== false) {
  228. if (isset($this->cookie['handle'])) {
  229. fclose($this->cookie['handle']);
  230. }
  231. if ($this->cookie['remove']) {
  232. unlink($this->cookie['file']);
  233. }
  234. }
  235. curl_close($this->Ch);
  236. }
  237. }