/**
* Implementation of the crawler function.
* Flow:
* 1- set crawler_id but leave status = 0
* 2- make status = 1
* 3- make status = 2
*/
function crawler_framework($selection_query, $status, $function_callback) {
// Initialize the crawler
db_query("INSERT INTO {crawler} VALUES (default)");
// Adding the parameter $crawler_id to the query params.
$selection_query['parameters'][0] = $crawler_id;
//Mark the extracted page as visited
$status++;
db_query("UPDATE {crawler_links} SET crawler_id = %d, status = %d WHERE crawler_id = 0",
$crawler_id,
$status);
// Get the link from crawler_links table
$selected_results =
db_fetch_array(db_query_range("SELECT l.path".
$selection_query['select'] .
" FROM {crawler_links} AS l ".
$selection_query['from'] .
" ".
$selection_query['where'],
(implode(",",
$selection_query['parameters'])),
$status,
0,
1));
// Update the status field to sign as executed that link
// (The following two lines could be move to the end of the function i think without problems)
$status++;
db_query("UPDATE {crawler_links} SET status = %d WHERE crawler_id = %d and status = %d",
$status,
$crawler_id,
$status-1);
// Create a new object and parse the page
$obj = new drupal_security_scanner_test();
// Set the cookie
$session_cookie =
variable_get('security_scanner_cookie',
'');
$obj->
curl_options =
array(
CURLOPT_COOKIE => $session_cookie,
);
$obj->drupalGet($selected_results['path']);
$obj->parse();
$function_callback($obj, $selected_results);
$obj->curlClose();
}
/**
* Crawler: page processing function
*/
function security_scanner_page_processing($obj, $selected_results) {
$links = $obj->elements->xpath('//a');
foreach($links as $link) {
$url_to_save = (string)$link->attributes()->href;
$absolute = getAbsoluteUrl($url_to_save);
// Get the page but check:
// a - if it's logout link, that makes me lose the cookie.
// b - if it's security scanner, skip
// c - if it's xss_injector, skip. That will launch the crawler
// d - if it's cron.php, that will make a loop
if (($parsed_url['query'] != 'q=logout') && ($parsed_url['query'] != 'q=admin/settings/security_scanner') && ($parsed_url['query'] != 'q=admin/settings/xss_injector') && ($parsed_url['file'] != 'cron.php')) {
// Here we use IGNORE to insert only one time a link into the table. ("path" is a unique index)
db_query("INSERT IGNORE INTO {crawler_links} VALUES ('','%s','','')",
$absolute);
}
}
}
// Get the forms inside the page
$inputs = $obj->elements->xpath("//input[@name='form_id']");
foreach($inputs as $input) {
$form_id = (string)$input->attributes()->id;
// Debug line! HAS TO BE REMOVED
echo $form_id.
"Form inserted! <br />";
// Here we use again IGNORE to insert only one time a form_id into the table. ("form_id" is the primary key)
db_query("INSERT IGNORE INTO {crawler_forms} VALUES ('%s','%d')",
$form_id,
$selected_results['id']);
}
}
/**
* Implementation of the crawler page.
*/
function security_scanner_cron() {
// Check if the auth session cookie value is already into the db, otherwise call
// the function that retrieve this (enable multithreading)
drupal_security_scanner_get_auth_cookie();
}
// "SELECT id,path FROM {crawler_links} WHERE crawler_id = %d AND status = 1 LIMIT 1", $crawler_id
$selection_query =
array(
'select' => ',l.id',
'from' => '',
'where' => 'WHERE crawler_id = %d and status = %d',
);
crawler_framework($selection_query, 0, 'security_scanner_page_processing');
}
}