Php function to crawl password protected website with username and password
In Php, we can crawl a page using curl, Make sure cURL is enabled in PHP.
// Checking CURL is enabled or not
if (! function_exists ( 'curl_version' )) {
exit ( "cURL is not Enabled ,Please enable it" );
}
Here are
steps to install and enable cURl on Ubuntu.
function get_crawl(){
$username="test";
$password="password";
//login form action url
$url="http://abc.com/login";
$postinfo = "UsernameAB=".$username."&PasswordBC=".$password;
$cookie_file_path = "/cookie1.txt";
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path);
//set the cookie the site has for certain features, this is optional
curl_setopt($ch, CURLOPT_COOKIE, "cookiename=0");
curl_setopt($ch, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $_SERVER['REQUEST_URI']);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postinfo);
curl_exec($ch);
return $ch;
}
$ch=get_crawl();
//the info per user with custom func.
function crawlPage($PageURL,$ch){
//page with the content I want to grab
curl_setopt($ch, CURLOPT_URL, $PageURL);
//do stuff with the info with DomDocument() etc
$html = curl_exec($ch);
curl_close($ch);
return $html;
}
// Usages
crawlPage("http://abc.com/profile",$ch);
No comments