Security scanner component for SimpleTest module: taking out a form_id attribute from an hidden field

  1. /**
  2.    *  Implementation of the crawler page.
  3.    */    
  4.   function security_scanner_cron() {
  5.     global $base_url;
  6.     //  Check if the auth session cookie value is already into the db, otherwise call
  7.     //  the function that retrieve this (enable multithreading)
  8.     if (variable_get('security_scanner_cookie','') == '') {
  9.       drupal_security_scanner_get_auth_cookie();
  10.     }
  11.     $time = time() + 25;
  12.     while (time() < $time) {
  13.       // Initialize the crawler
  14.       db_query('INSERT INTO {crawler} VALUES (default)');
  15.       $crawler_id = db_last_insert_id('crawler', 'id');
  16.       //Mark the extracted page as visited
  17.       db_query("UPDATE {crawler_links} SET crawler_id = %d, status = 1 WHERE crawler_id = 0 LIMIT 1", $crawler_id);
  18.       // Get the link from crawler_links table
  19.       $page_to_visit = db_fetch_array(db_query("SELECT id,path FROM {crawler_links} WHERE crawler_id = %d AND status = 1 LIMIT 1", $crawler_id));
  20.       // Update the status field to sign as executed that link
  21.       db_query("UPDATE {crawler_links} SET status = 1 WHERE crawler_id = %d and status = 1 LIMIT 1", $crawler_id);
  22.       // Create a new object and parse the page
  23.       // $obj = new drupal_security_scanner_test();
  24.       $obj = new drupal_security_scanner_test();
  25.       // Set the cookie
  26.       $session_cookie = variable_get('security_scanner_cookie','');
  27.       $obj->curl_options = array(
  28.         CURLOPT_COOKIE => $session_cookie,
  29.       );
  30.       $obj->drupalGet($page_to_visit['path']);
  31.       $obj->parse();
  32.       $links = $obj->elements->xpath('//a');      
  33.       foreach($links as $link) {
  34.         $url_to_save = (string)$link->attributes()->href;
  35.         $absolute = getAbsoluteUrl($url_to_save);
  36.         // Get the page but check if it's logout link, that makes me lose the cookie!
  37.         $parsed_url = parse_url($absolute);
  38.         if ($parsed_url['query'] != 'q=logout') {  
  39.           if (substr($absolute, 0, strlen($base_url)) == $base_url) {
  40.             // Here we use IGNORE to insert only one time a link into the table. ("path" is a unique index)
  41.             db_query("INSERT IGNORE INTO {crawler_links} VALUES ('','%s','','')", $absolute);
  42.           }
  43.         }
  44.       }
  45.       // Get the forms inside the page
  46.       $forms = $obj->elements->xpath('//form');
  47.       foreach($forms as $form) {
  48.         // Here we use again IGNORE to insert only one time a form_id into the table. ("form_id" is the primary key)
  49.         $form->getAttribute('id');
  50.         echo "<br />id: ".$page_to_visit['id'].'<br />';
  51.         echo "<br /><br />";
  52.         //db_query("INSERT IGNORE INTO {crawler_forms} VALUES ('%s','%d')", $form->, $page_to_visit['id']);
  53.       }
  54.       db_query("UPDATE {crawler_links} SET status = 2 WHERE crawler_id = %d and status = 1 LIMIT 1", $crawler_id);
  55.     }
  56.     // This has to be removed because the cookie has to stay into the databes to enable multiple istances of the crawler
  57.     variable_del('security_scanner_cookie');
  58.     $obj->curlClose();
  59.     return '<p>'. t('The quick brown fox jumps over the lazy dog.') .'</p>';
  60.   }
  61.  
  62.   /**
  63.    *  Get the cookie of the admin and insert the first link into the table crawler_links.
  64.    *  There is an issue, I have to start the crawler from uid different than 1.    
  65.    */
  66.    function drupal_security_scanner_get_auth_cookie() {
  67.     $initial_path = user_pass_reset_url(user_load(1));
  68.     // Add sleep to go round a bug inside a drupal core function. Remove it when it's changed into core.
  69.     sleep(1);
  70.     //  Create a new object, set cURL options to call the function drupal_security_scanner_curl_headers that
  71.     //  saves into the variable table the admin cookie. Then set the cookie.
  72.     $obj = new drupal_security_scanner_test();
  73.     $obj->curl_options = array(
  74.       CURLOPT_HEADERFUNCTION => 'drupal_security_scanner_curl_headers',
  75.       CURLOPT_FOLLOWLOCATION => 0,
  76.     );
  77.     // Get the page with password reset and push submit button
  78.     $obj->drupalGet($initial_path);
  79.     $obj->drupalPost($initial_path,'',TRUE);
  80.     //  Add the first url into the crawler_links table.
  81.     db_query("INSERT INTO {crawler_links} VALUES ('','%s','','')", url('admin', array('absolute' => TRUE)));
  82.     return true;
  83.   }
  84.  
  85.   /**
  86.    *  This function will extract headers and return the lenght.
  87.    */  
  88.  function drupal_security_scanner_curl_headers($ch = NULL, $header = NULL) {
  89.     static $headers = array();
  90.     if (!isset($ch)) {
  91.       return $headers;
  92.     }
  93.     if(!strncmp($header, "Set-Cookie:", 11)) {
  94.       //  get the cookie
  95.       $cookiestr = trim(substr($header, 11, -1));
  96.       $cookie = explode(';', $cookiestr);
  97.       variable_set('security_scanner_cookie', $cookie[0]);
  98.     }
  99.     return strlen($header);  
  100.   }