DrupalBin
Submit Code
About
Recent Posts
beginnings of an Amazon/eBay-style "welcome back" module for Drupal
5 min 9 sec
ago
function for clearing a cookie
16 min 35 sec
ago
Drupal 5 menu
1 hour 42 min
ago
Views 2 get all field names
2 hours 25 min
ago
more
Tags
CCK
drupal
fapi
jquery
menu
module
Panels
php
simpletest
test
theme
views
more tags
User login
Log in using OpenID:
What is OpenID?
Username:
*
Password:
*
Create new account
Request new password
Log in using OpenID
Cancel OpenID login
Home
Fix for Crawler
View
Download
Fix
This fix will not be saved to the database until you submit.
Summary:
Tags:
Any tags you'd like to associate with your code, delimitered by commas (example: Views, CCK, Module, etc).
Source code:
*
<?php // $Id$ /** * cURL class */ class drupal_security_scanner_test { var $ch; var $curl_options = array(); var $elements; /** * Initializes the cURL connection and gets a session cookie. * * This function will add authentaticon headers as specified in * simpletest_httpauth_username and simpletest_httpauth_pass variables. * Also, see the description of $curl_options among the properties. */ function curlConnect() { global $base_url, $db_prefix; if (!isset($this->ch)) { $this->ch = curl_init(); $curl_options = $this->curl_options + array( CURLOPT_COOKIEJAR => $this->cookie_file, CURLOPT_URL => $base_url, CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, ); if (preg_match('/simpletest\d+/', $db_prefix)) { $curl_options[CURLOPT_USERAGENT] = $db_prefix; } if (!isset($curl_options[CURLOPT_USERPWD]) && ($auth = variable_get('simpletest_httpauth_username', ''))) { if ($pass = variable_get('simpletest_httpauth_pass', '')) { $auth .= ':' . $pass; } $curl_options[CURLOPT_USERPWD] = $auth; } return $this->curlExec($curl_options); } } /** * Peforms a cURL exec with the specified options after calling curlConnect(). * * @param array $curl_options Custom cURL options. * @return string Content returned from the exec. */ function curlExec($curl_options) { $this->curlConnect(); $url = empty($curl_options[CURLOPT_URL]) ? curl_getinfo($this->ch, CURLINFO_EFFECTIVE_URL) : $curl_options[CURLOPT_URL]; curl_setopt_array($this->ch, $this->curl_options + $curl_options); $this->_content = curl_exec($this->ch); $this->plain_text = FALSE; $this->elements = FALSE; return $this->_content; } /** * Close the cURL handler and unset the handler. */ function curlClose() { if (isset($this->ch)) { curl_close($this->ch); unset($this->ch); } } /** * Parse content returned from curlExec using DOM and simplexml. * * @return SimpleXMLElement A SimpleXMLElement or FALSE on failure. */ function parse() { if (!$this->elements) { // DOM can load HTML soup. But, HTML soup can throw warnings, supress // them. @$htmlDom = DOMDocument::loadHTML($this->_content); if ($htmlDom) { // It's much easier to work with simplexml than DOM, luckily enough // we can just simply import our DOM tree. $this->elements = simplexml_import_dom($htmlDom); } } if (!$this->elements) { return "FALSE"; } return $this->elements; } /** * Retrieves a Drupal path or an absolute path. * * @param $path string Drupal path or url to load into internal browser * @param array $options Options to be forwarded to url(). * @return The retrieved HTML string, also available as $this->drupalGetContent() */ function drupalGet($path, $options = array()) { $options['absolute'] = TRUE; // We re-using a CURL connection here. If that connection still has certain // options set, it might change the GET into a POST. Make sure we clear out // previous options. return $this->curlExec(array(CURLOPT_URL => url($path, $options), CURLOPT_POST => FALSE, CURLOPT_POSTFIELDS => array())); } } /** * Implementation of hook_menu(). */ function security_scanner_menu() { $items['admin/settings/security_scanner'] = array( 'title' => 'Security Scanner', 'page callback' => 'page_security_scanner', 'access arguments' => array('access scanner'), 'type' => MENU_NORMAL_ITEM, ); return $items; } /** * Implementation of the crawler page. */ function page_security_scanner() { $initial_path = 'http://localhost/soc2008/'; db_query("INSERT INTO {crawler_links} VALUES ('','%s','','','')", $initial_path); $time = time() + 5; while (time() < $time) { // Initialize the crawler db_query('INSERT INTO {crawler} VALUES (default)'); $crawler_id = db_last_insert_id('crawler', 'id'); //Mark the extracted page as visited db_query("UPDATE {crawler_links} SET crawler_id = %d WHERE crawler_id = 0 LIMIT 1", $crawler_id); // Get the link from crawler_links table $result = db_query("SELECT path FROM {crawler_links} WHERE crawler_id = %d AND status = 0 LIMIT 1", $crawler_id); $page_to_visit = db_fetch_array($result); // Update the status field to sign as executed that link db_query("UPDATE {crawler_links} SET status = 1 WHERE crawler_id = %d and status = 0 LIMIT 1", $crawler_id); //Create a new object and parse the page $obj = new drupal_security_scanner_test(); $obj->drupalGet($page_to_visit[0]); $obj->parse(); $links = $obj->elements->xpath('//a'); foreach($links as $link) { // Here I have to remove every link that exit from the domain. db_query("INSERT INTO crawler_links VALUES ('','%s','','','')", $link['href']); } } return '<p>'. t('The quick brown fox jumps over the lazy dog.') .'</p>'; } /** * Implementation of hook _perm() */ function security_scanner_perm() { return array('access scanner'); } /** * Implementation of hook _help() */ function security_scanner_help($path, $arg) { switch ($path) { case 'security_scanner': // Here is some help text for a custom page. return t('This sentence contains all the letters in the English alphabet.'); } } ?>
Syntax highlighting mode:
ActionScript
ColdFusion
Diff
Drupal
Drupal 5
Drupal 6
HTML
Javascript
MySQL
PHP
Python
robots.txt
SQL
Text
Select the syntax highlighting mode to use.