Fix for Demo code to parse the Google SoC 2008 applications list available to mentors

  1. <?php
  2. /**
  3.  * Parse the Google SoC 2008 data available to mentors at
  4.  * http://code.google.com/soc/2008/drupal/open.html
  5.  * in order to include it in a Drupal app
  6.  *
  7.  * @author FG Marand http://drupal.org/user/27985
  8.  * @license GPL2
  9.  *
  10.  * Usage: log to the SoC2008 site as a mentor, save the pages to <your site>/files/soc.html for the list,
  11.  * and to <your site>/files/details.html for a details page
  12.  *
  13.  * and go to <your site>/testcode/list_apps
  14.  *   for the list
  15.  * <your site>/testcode/app_details
  16.  *   for an app details
  17.  *
  18.  * WARNING: this is just unsafe demo code.
  19.  */
  20.  
  21. function testcode_menu($may_cache)
  22.   {
  23.   $items = array();
  24.   if ($may_cache)
  25.     {
  26.     $items[] = array
  27.       (
  28.       'title'    => 'SoC 2008 - List',
  29.       'path'     => 'testcode/list_apps',
  30.       'access'   => user_access('administer nodes'),
  31.       'callback' => 'testcode_list_apps',
  32.       );
  33.     $items[] = array
  34.       (
  35.       'title'    => 'SoC 2008 - App details',
  36.       'path'     => 'testcode/app_details',
  37.       'access'   => user_access('administer nodes'),
  38.       'callback' => 'testcode_app_details',
  39.       );
  40.     }
  41.  
  42.   return $items;
  43.   }
  44.  
  45. /**
  46.  * Parse the page saved from
  47.  * http://code.google.com/soc/2008/drupal/open.html
  48.  * containing the applications list
  49.  *
  50.  * Link back to the individual app pages on Google
  51.  *
  52.  * @return string
  53.  */
  54. function testcode_list_apps()
  55.   {
  56.   $socBase = 'http://code.google.com/soc/2008/drupal/';
  57.   $ret = '';
  58.  
  59.   $unsafeFile = file_get_contents('files/soc.html');
  60.   @$htmlDom = DOMDocument::loadHTML($unsafeFile);
  61.   $xml = simplexml_import_dom($htmlDom);
  62.  
  63.   $arApps = array();
  64.  
  65.   $appList = $xml->xpath('//table[@class="applist"]/tr');
  66.   array_shift($appList);
  67.   foreach($appList as $row)
  68.     {
  69.     /**
  70.      * Note: trim() silently casts the SimpleXML elements to strings
  71.      */
  72.  
  73.     $class = trim($row['class']) == 'listrequestapp' ? 'Request' : '&nbsp;';
  74.  
  75.     $link = $row->td[0]->table->tr[0]->td->a;
  76.     $l  = l(trim($link[0]), $socBase . $link['href']);
  77.  
  78.     $ts = $row->td[0]->table->tr[1]->td;
  79.  
  80.     $student = trim($row->td[1]);
  81.     $mentor  = trim(strip_tags($row->td[2]->asXml()));
  82.     $score   = trim($row->td[3]);
  83.  
  84.     $arApps[] = array
  85.       (
  86.       'class'   => $class,
  87.       'link'    => $l,
  88.       'student' => $student,
  89.       'mentor'  => $mentor,
  90.       'score'   => $score,
  91.       );
  92.     }
  93.  
  94.   /**
  95.    * Now we can format the table as wished
  96.    */
  97.   $header = array(t('Status'), t('Project'), t('Student'), t('Mentor'), t('Score'));
  98.  
  99.   $ret = theme('table', $header, $arApps);
  100.   return $ret;
  101.   }
  102.  
  103. /**
  104.  * Display an individual app details, saved from
  105.  * http://code.google.com/soc/2008/drupal/app.html?csaid=<some csaid>
  106.  *
  107.  *
  108.  * @param string $csaid Code Summer App ID
  109.  */
  110. function testcode_app_details()
  111.   {
  112.   $csaid = 'PxIWBwNUVgECNRINH0VSXCxfPURUFg4GUXJfbkJQEQoBBiU%3D%0A'; // sample for the security scanner mentored by chx
  113.  
  114.   $ret = '';
  115.  
  116.   $unsafeFile = file_get_contents('files/details.html');
  117.   @$htmlDom = DOMDocument::loadHTML($unsafeFile);
  118.   $xml = simplexml_import_dom($htmlDom);
  119.  
  120.   $sxForm = $xml->xpath('//form[@action="app.do"]'); // this could provide access to the hidden fields, but we don't need them...
  121.  
  122.   $sxApplists = $xml->xpath('//table[@class="applist"]');
  123.   $sxMeta        = $sxApplists[0]->tr;
  124.   $sxDescription = $sxApplists[1]->tr;
  125.  
  126.   /**
  127.    * Parse the meta
  128.    */
  129.   $arMeta = array();
  130.   foreach($sxMeta as $sxMetaRow)
  131.     {
  132.     $key = trim($sxMetaRow->th);
  133.     $val = trim($sxMetaRow->td);
  134.     if (!empty($key))
  135.       {
  136.       $arMeta[$key] = $val;
  137.       }
  138.     }
  139.   // dvr($arMeta);
  140.  
  141.   /**
  142.    * Parse the description
  143.    */
  144.   $arDescription = array();
  145.   $currentArea = '';
  146.   foreach($sxDescription as $sxDescriptionRow)
  147.     {
  148.     if (isset($sxDescriptionRow->th)) // this is the title for a new area
  149.       {
  150.       $currentArea = trim($sxDescriptionRow->th);
  151.       }
  152.     else // this HAS to be a td
  153.       {
  154.       assert(!empty($currentArea));
  155.       $arDescription[$currentArea] = trim($sxDescriptionRow->td->div); // Warning: this contains HTML
  156.       }
  157.     }
  158.  
  159.   $ret = array
  160.     (
  161.     'meta' => $arMeta,
  162.     'description' => $arDescription
  163.     );
  164.  
  165.   $ret = "<pre>" . print_r($ret, true) . "</pre>";
  166.   return $ret;
  167.   }