<?php
// $Id$
 
 
 /**
  *  cURL class 
  */
  class drupal_security_scanner_test {
  
  var $ch;
  var $curl_options = array();
  var $elements;
  
  /**
   * Initializes the cURL connection and gets a session cookie.
   *
   * This function will add authentaticon headers as specified in
   * simpletest_httpauth_username and simpletest_httpauth_pass variables.
   * Also, see the description of $curl_options among the properties.
   */
  function curlConnect() {
    global $base_url, $db_prefix;
    if (!isset($this->ch)) {
      $this->ch = curl_init();
      $curl_options = $this->curl_options + array(
        CURLOPT_COOKIEJAR => $this->cookie_file,
        CURLOPT_URL => $base_url,
        CURLOPT_FOLLOWLOCATION => TRUE,
        CURLOPT_RETURNTRANSFER => TRUE,
      );
      if (preg_match('/simpletest\d+/', $db_prefix)) {
        $curl_options[CURLOPT_USERAGENT] = $db_prefix;
      }
      if (!isset($curl_options[CURLOPT_USERPWD]) && ($auth = variable_get('simpletest_httpauth_username', ''))) {
        if ($pass = variable_get('simpletest_httpauth_pass', '')) {
          $auth .= ':' . $pass;
        }
        $curl_options[CURLOPT_USERPWD] = $auth;
      }
      return $this->curlExec($curl_options);
    }
  }

  /**
   * Peforms a cURL exec with the specified options after calling curlConnect().
   *
   * @param array $curl_options Custom cURL options.
   * @return string Content returned from the exec.
   */
  function curlExec($curl_options) {
    $this->curlConnect();
    $url = empty($curl_options[CURLOPT_URL]) ? curl_getinfo($this->ch, CURLINFO_EFFECTIVE_URL) : $curl_options[CURLOPT_URL];
    curl_setopt_array($this->ch, $this->curl_options + $curl_options);
    $this->_content = curl_exec($this->ch);
    $this->plain_text = FALSE;
    $this->elements = FALSE;
    return $this->_content;
  }

  /**
   * Close the cURL handler and unset the handler.
   */
  function curlClose() {
    if (isset($this->ch)) {
      curl_close($this->ch);
      unset($this->ch);
    }
  }
  
    /**
   * Parse content returned from curlExec using DOM and simplexml.
   *
   * @return SimpleXMLElement A SimpleXMLElement or FALSE on failure.
   */
  function parse() {
    if (!$this->elements) {
      // DOM can load HTML soup. But, HTML soup can throw warnings, supress
      // them.
      @$htmlDom = DOMDocument::loadHTML($this->_content);
      if ($htmlDom) {
        // It's much easier to work with simplexml than DOM, luckily enough
        // we can just simply import our DOM tree.
        $this->elements = simplexml_import_dom($htmlDom);
      }
    }
    if (!$this->elements) {
      return "FALSE";
    }
    return $this->elements;
  }

  /**
   * Retrieves a Drupal path or an absolute path.
   *
   * @param $path string Drupal path or url to load into internal browser
   * @param array $options Options to be forwarded to url().
   * @return The retrieved HTML string, also available as $this->drupalGetContent()
   */
  function drupalGet($path, $options = array()) {
    $options['absolute'] = TRUE;

    // We re-using a CURL connection here.  If that connection still has certain
    // options set, it might change the GET into a POST.  Make sure we clear out
    // previous options.
    return $this->curlExec(array(CURLOPT_URL => url($path, $options), CURLOPT_POST => FALSE, CURLOPT_POSTFIELDS => array()));
  }
}  
 
  /**
   * Implementation of hook_menu().
   */
  function security_scanner_menu() {
  $items['admin/settings/security_scanner'] = array(
    'title' => 'Security Scanner',
    'page callback' => 'page_security_scanner',
    'access arguments' => array('access scanner'),
    'type' => MENU_NORMAL_ITEM,
  );
  return $items;
  }

  /**
   *  Implementation of the crawler page.
   */     
  function page_security_scanner() {
    $initial_path = 'http://localhost/soc2008/';
    db_query("INSERT INTO {crawler_links} VALUES ('','%s','','','')", $initial_path);
    $time = time() + 5;
    while (time() < $time) {
      // Initialize the crawler
      db_query('INSERT INTO {crawler} VALUES (default)');
      $crawler_id = db_last_insert_id('crawler', 'id');
      //Mark the extracted page as visited
      db_query("UPDATE {crawler_links} SET crawler_id = %d WHERE crawler_id = 0 LIMIT 1", $crawler_id);
      // Get the link from crawler_links table
      $result = db_query("SELECT path FROM {crawler_links} WHERE crawler_id = %d AND status = 0 LIMIT 1", $crawler_id);
      $page_to_visit = db_fetch_array($result);
      // Update the status field to sign as executed that link
      db_query("UPDATE {crawler_links} SET status = 1 WHERE crawler_id = %d and status = 0 LIMIT 1", $crawler_id);
      //Create a new object and parse the page
      $obj = new drupal_security_scanner_test();
      $obj->drupalGet($page_to_visit[0]);
      $obj->parse();
      $links = $obj->elements->xpath('//a');
      foreach($links as $link) {
        // Here I have to remove every link that exit from the domain.
        db_query("INSERT INTO crawler_links VALUES ('','%s','','','')", $link['href']);
      }
    }
    return '<p>'. t('The quick brown fox jumps over the lazy dog.') .'</p>';
  }

  /**
   *  Implementation of hook _perm()
   */   
  function security_scanner_perm() {
    return array('access scanner');
  }
  
  /**
   *  Implementation of hook _help()
   */
   function security_scanner_help($path, $arg) {
     switch ($path) {
       case 'security_scanner':
       // Here is some help text for a custom page.
         return t('This sentence contains all the letters in the English alphabet.');
     }
   }
       
   
?>
