diff --git a/code/StaticSiteContentExtractor.php b/code/StaticSiteContentExtractor.php
index 325a481..d6bd55c 100644
--- a/code/StaticSiteContentExtractor.php
+++ b/code/StaticSiteContentExtractor.php
@@ -9,323 +9,344 @@
  * Given a set of fieldnames and CSS selectors corresponding to them, a map of content
  * fields will be returned.
  */
-class StaticSiteContentExtractor extends Object {
-
-	/**
-	 *
-	 * @var string
-	 */
-	protected $url = null;
-
-	/**
-	 *
-	 * @var string
-	 */
-	protected $content = null;
-
-	/**
-	 *
-	 * @var phpQueryObject
-	 */
-	protected $phpQuery = null;
-
-	/**
-	 * Set this by using the yml config system
-	 * 
-	 * Example:
-	 * <code>
-	 * StaticSiteContentExtractor:
+class StaticSiteContentExtractor extends Object
+{
+
+    /**
+     *
+     * @var string
+     */
+    protected $url = null;
+
+    /**
+     *
+     * @var string
+     */
+    protected $content = null;
+
+    /**
+     *
+     * @var phpQueryObject
+     */
+    protected $phpQuery = null;
+
+    /**
+     * Set this by using the yml config system
+     * 
+     * Example:
+     * <code>
+     * StaticSiteContentExtractor:
      *    log_file:  ../logs/import-log.txt
-	 * </code>
-	 *
-	 * @var string
-	 */
-	private static $log_file = null;
-
-	/**
-	 * Create a StaticSiteContentExtractor for a single URL/.
-	 * 
-	 * @param string $url The absolute URL to extract content from
-	 */
-	public function __construct($url) {
-		$this->url = $url;
-	}
-
-	/**
-	 * Extract content for map of field => css-selector pairs
-	 * 
-	 * @param  array $selectorMap A map of field name => css-selector
-	 * @return array              A map of field name => array('selector' => selector, 'content' => field content)
-	 */
-	public function extractMapAndSelectors($selectorMap) {
-		
-		if(!$this->phpQuery) {
-			$this->fetchContent();
-		}
-
-		$output = array();
-
-		foreach($selectorMap as $fieldName => $extractionRules) {
-			if(!is_array($extractionRules)) {
-				$extractionRules = array($extractionRules);
-			}
-
-			foreach($extractionRules as $extractionRule) {
-				if(!is_array($extractionRule)) {
-					$extractionRule = array('selector' => $extractionRule);
-				}
-				
-				$content = $this->extractField($extractionRule['selector'], $extractionRule['attribute'], $extractionRule['outerhtml']);
-				
-				if(!$content) {
-					continue;
-				}
-
-				$content = $this->excludeContent($extractionRule['excludeselectors'], $extractionRule['selector'], $content);
-				
-				if(!$content) {
-					continue;
-				}
-
-				if(!empty($extractionRule['plaintext'])) {
-					$content = Convert::html2raw($content);
-				}
-
-				// We found a match, select that one and ignore any other selectors
-				$output[$fieldName] = $extractionRule;
-				$output[$fieldName]['content'] = $content;
-				$this->log("Value set for $fieldName");
-				break;
-			}
-		}
-		return $output;
-	}
-
-	/**
-	 * Extract content for a single css selector
-	 * 
-	 * @param  string $cssSelector The selector for which to extract content.
-	 * @param  string $attribute If set, the value will be from this HTML attribute
-	 * @param  bool $outherHTML should we return the full HTML of the whole field
-	 * @return string The content for that selector
-	 */
-	public function extractField($cssSelector, $attribute = null, $outerHTML = false) {
-		if(!$this->phpQuery) {
-			$this->fetchContent();
-		}
-
-		$elements = $this->phpQuery[$cssSelector];
-
-		// just return the inner HTML for this node
-		if(!$outerHTML || !$attribute) {
-			return trim($elements->html());
-		}
-		
-		$result = '';
-		foreach($elements as $element) {
-			// Get the full html for this element
-			if($outerHTML) {
-				$result .= $this->getOuterHTML($element);
-			// Get the value of a attribute
-			} elseif($attribute && trim($element->getAttribute($attribute))) {
-				$result .= ($element->getAttribute($attribute)).PHP_EOL;
-			}
-		}
-		
-		return trim($result);
-	}
-
-	/**
-	 * Strip away content from $content that matches one or many css selectors.
-	 *
-	 * @param array $excludeSelectors
-	 * @param string $content
-	 * @return string
-	 */
-	protected function excludeContent($excludeSelectors, $parentSelector, $content) {
-		if(!$excludeSelectors) {
-			return $content;
-		}
-
-		foreach($excludeSelectors as $excludeSelector) {
-			if(!trim($excludeSelector)) {
-				continue;
-			}
-			$element = $this->phpQuery[$parentSelector.' '.$excludeSelector];
-			if($element) {
-				$remove = $element->htmlOuter();
-				$content = str_replace($remove, '', $content);
-				$this->log(' - Excluded content from "'.$parentSelector.' '.$excludeSelector.'"');
-			}
-		}
-		return ($content);
-	}
-
-	/**
-	 * Get the full HTML of the element and its childs
-	 *
-	 * @param DOMElement $element
-	 * @return string
-	 */
-	protected function getOuterHTML(DOMElement $element) {
-		$doc = new DOMDocument();
-		$doc->formatOutput = false;
-		$doc->preserveWhiteSpace = true;
-		$doc->substituteEntities = false;
-		$doc->appendChild($doc->importNode($element, true));
-		return $doc->saveHTML();
-	}
-
-	/**
-	 *
-	 * @return string
-	 */
-	public function getContent() {
-		return $this->content;
-	} 
-
-	/**
-	 * Fetch the content and initialise $this->content and $this->phpQuery
-	 * 
-	 * @return void
-	 */
-	protected function fetchContent() {
-		$this->log('Fetching ' . $this->url);
-
-		$response = $this->curlRequest($this->url, "GET");	
-		$this->content = $response->getBody();
-		$this->phpQuery = phpQuery::newDocument($this->content);
-
-		//// Make the URLs all absolute
-
-		// Useful parts of the URL
-		if(!preg_match('#^[a-z]+:#i', $this->url, $matches)) throw new Exception('Bad URL: ' . $this->url);
-		$protocol = $matches[0];
-
-		if(!preg_match('#^[a-z]+://[^/]+#i', $this->url, $matches)) throw new Exception('Bad URL: ' . $this->url);
-		$server = $matches[0];
-
-		$base = (substr($this->url,-1) == '/') ? $this->url : dirname($this->url) . '/';
-
-		$this->log('Rewriting links in content');
-
-		$rewriter = new StaticSiteLinkRewriter(function($url) use($protocol, $server, $base) {
-			// Absolute
-			if(preg_match('#^[a-z]+://[^/]+#i', $url) || substr($url,0,7) == 'mailto:') return $url;
-
-			// Protocol relative
-			if(preg_match('#^//[^/]#i', $url)) return $protocol . $url;
-
-			// Server relative
-			if($url[0] == "/") return $server . $url;
-
-			// Relative
-			$result = $base . $url;
-			while(strpos($result, '/../') !== false) {
-				$result = preg_replace('#[^/]+/+../+#i','/', $result);
-			}
-			while(strpos($result, '/./') !== false) {
-				$result = str_replace('/./','/', $result);
-			}
-			return $result;
-
-		});
-
-		#$rewriter->rewriteInPQ($this->phpQuery);
-		#echo($this->phpQuery->html());
-	}
-
-	/**
-	 * Use cURL to request a URL, and return a SS_HTTPResponse object.
-	 *
-	 * @param string $url
-	 * @param string $method
-	 * @param string $data
-	 * @param string $headers
-	 * @param array $curlOptions
-	 * @return \SS_HTTPResponse
-	 */
-	protected function curlRequest($url, $method, $data = null, $headers = null, $curlOptions = array()) {
-		$ch        = curl_init();
-		$timeout   = 5;
-		$ssInfo = new SapphireInfo;
-		$useragent = 'SilverStripe/' . $ssInfo->version();
-
-		curl_setopt($ch, CURLOPT_URL, $url);
-		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
-		curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
-		curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
-		curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
-		curl_setopt($ch, CURLOPT_HEADER, 1);
-
-		if($headers) curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
-
-		// Add fields to POST and PUT requests
-		if($method == 'POST') {
-			curl_setopt($ch, CURLOPT_POST, 1);
-			curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
-		} elseif($method == 'PUT') {
-			$put = fopen("php://temp", 'r+');				
-			fwrite($put, $data);
-			fseek($put, 0); 
-
-			curl_setopt($ch, CURLOPT_PUT, 1);
-			curl_setopt($ch, CURLOPT_INFILE, $put);
-			curl_setopt($ch, CURLOPT_INFILESIZE, strlen($data)); 
-		}
-
-		// Follow redirects
-		curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
-
-		// Set any custom options passed to the request() function
-		curl_setopt_array($ch, $curlOptions);
-
-		// Run request
-		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
-		$fullResponseBody = curl_exec($ch);
-		$curlError = curl_error($ch);
-
-		list($responseHeaders, $responseBody) = explode("\n\n", str_replace("\r","",$fullResponseBody), 2);
-		if(preg_match("#^HTTP/1.1 100#", $responseHeaders)) {
-			list($responseHeaders, $responseBody) = explode("\n\n", str_replace("\r","",$responseBody), 2);
-		}
-
-		$responseHeaders = explode("\n", trim($responseHeaders));
-		array_shift($responseHeaders);
-
-		$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-		curl_close($ch);
-
-		if($curlError !== '' || $statusCode == 0) {
-			$statusCode = 500;
-		}
-
-		$response = new SS_HTTPResponse($responseBody, $statusCode);		
-		foreach($responseHeaders as $headerLine) {
-			if(strpos($headerLine, ":") !== false) {
-				list($headerName, $headerVal) = explode(":", $headerLine, 2);
-				$response->addHeader(trim($headerName), trim($headerVal));
-			}
-		}
-
-		
-		return $response;
-	}
-
-	/**
-	 * Log a message if the logging has been setup according to docs
-	 *
-	 * @param string $message
-	 * @return void
-	 */
-	protected function log($message) {
-		$logFile = Config::inst()->get('StaticSiteContentExtractor','log_file');
-		if(!$logFile) {
-			return;
-		}
-
-		if(is_writable($logFile) || !file_exists($logFile) && is_writable(dirname($logFile))) {
-			error_log($message . "\n", 3, $logFile);
-		}
-	}
+     * </code>
+     *
+     * @var string
+     */
+    private static $log_file = null;
+
+    /**
+     * Create a StaticSiteContentExtractor for a single URL/.
+     * 
+     * @param string $url The absolute URL to extract content from
+     */
+    public function __construct($url)
+    {
+        $this->url = $url;
+    }
+
+    /**
+     * Extract content for map of field => css-selector pairs
+     * 
+     * @param  array $selectorMap A map of field name => css-selector
+     * @return array              A map of field name => array('selector' => selector, 'content' => field content)
+     */
+    public function extractMapAndSelectors($selectorMap)
+    {
+        if (!$this->phpQuery) {
+            $this->fetchContent();
+        }
+
+        $output = array();
+
+        foreach ($selectorMap as $fieldName => $extractionRules) {
+            if (!is_array($extractionRules)) {
+                $extractionRules = array($extractionRules);
+            }
+
+            foreach ($extractionRules as $extractionRule) {
+                if (!is_array($extractionRule)) {
+                    $extractionRule = array('selector' => $extractionRule);
+                }
+                
+                $content = $this->extractField($extractionRule['selector'], $extractionRule['attribute'], $extractionRule['outerhtml']);
+                
+                if (!$content) {
+                    continue;
+                }
+
+                $content = $this->excludeContent($extractionRule['excludeselectors'], $extractionRule['selector'], $content);
+                
+                if (!$content) {
+                    continue;
+                }
+
+                if (!empty($extractionRule['plaintext'])) {
+                    $content = Convert::html2raw($content);
+                }
+
+                // We found a match, select that one and ignore any other selectors
+                $output[$fieldName] = $extractionRule;
+                $output[$fieldName]['content'] = $content;
+                $this->log("Value set for $fieldName");
+                break;
+            }
+        }
+        return $output;
+    }
+
+    /**
+     * Extract content for a single css selector
+     * 
+     * @param  string $cssSelector The selector for which to extract content.
+     * @param  string $attribute If set, the value will be from this HTML attribute
+     * @param  bool $outherHTML should we return the full HTML of the whole field
+     * @return string The content for that selector
+     */
+    public function extractField($cssSelector, $attribute = null, $outerHTML = false)
+    {
+        if (!$this->phpQuery) {
+            $this->fetchContent();
+        }
+
+        $elements = $this->phpQuery[$cssSelector];
+
+        // just return the inner HTML for this node
+        if (!$outerHTML || !$attribute) {
+            return trim($elements->html());
+        }
+        
+        $result = '';
+        foreach ($elements as $element) {
+            // Get the full html for this element
+            if ($outerHTML) {
+                $result .= $this->getOuterHTML($element);
+            // Get the value of a attribute
+            } elseif ($attribute && trim($element->getAttribute($attribute))) {
+                $result .= ($element->getAttribute($attribute)).PHP_EOL;
+            }
+        }
+        
+        return trim($result);
+    }
+
+    /**
+     * Strip away content from $content that matches one or many css selectors.
+     *
+     * @param array $excludeSelectors
+     * @param string $content
+     * @return string
+     */
+    protected function excludeContent($excludeSelectors, $parentSelector, $content)
+    {
+        if (!$excludeSelectors) {
+            return $content;
+        }
+
+        foreach ($excludeSelectors as $excludeSelector) {
+            if (!trim($excludeSelector)) {
+                continue;
+            }
+            $element = $this->phpQuery[$parentSelector.' '.$excludeSelector];
+            if ($element) {
+                $remove = $element->htmlOuter();
+                $content = str_replace($remove, '', $content);
+                $this->log(' - Excluded content from "'.$parentSelector.' '.$excludeSelector.'"');
+            }
+        }
+        return ($content);
+    }
+
+    /**
+     * Get the full HTML of the element and its childs
+     *
+     * @param DOMElement $element
+     * @return string
+     */
+    protected function getOuterHTML(DOMElement $element)
+    {
+        $doc = new DOMDocument();
+        $doc->formatOutput = false;
+        $doc->preserveWhiteSpace = true;
+        $doc->substituteEntities = false;
+        $doc->appendChild($doc->importNode($element, true));
+        return $doc->saveHTML();
+    }
+
+    /**
+     *
+     * @return string
+     */
+    public function getContent()
+    {
+        return $this->content;
+    }
+
+    /**
+     * Fetch the content and initialise $this->content and $this->phpQuery
+     * 
+     * @return void
+     */
+    protected function fetchContent()
+    {
+        $this->log('Fetching ' . $this->url);
+
+        $response = $this->curlRequest($this->url, "GET");
+        $this->content = $response->getBody();
+        $this->phpQuery = phpQuery::newDocument($this->content);
+
+        //// Make the URLs all absolute
+
+        // Useful parts of the URL
+        if (!preg_match('#^[a-z]+:#i', $this->url, $matches)) {
+            throw new Exception('Bad URL: ' . $this->url);
+        }
+        $protocol = $matches[0];
+
+        if (!preg_match('#^[a-z]+://[^/]+#i', $this->url, $matches)) {
+            throw new Exception('Bad URL: ' . $this->url);
+        }
+        $server = $matches[0];
+
+        $base = (substr($this->url, -1) == '/') ? $this->url : dirname($this->url) . '/';
+
+        $this->log('Rewriting links in content');
+
+        $rewriter = new StaticSiteLinkRewriter(function ($url) use ($protocol, $server, $base) {
+            // Absolute
+            if (preg_match('#^[a-z]+://[^/]+#i', $url) || substr($url, 0, 7) == 'mailto:') {
+                return $url;
+            }
+
+            // Protocol relative
+            if (preg_match('#^//[^/]#i', $url)) {
+                return $protocol . $url;
+            }
+
+            // Server relative
+            if ($url[0] == "/") {
+                return $server . $url;
+            }
+
+            // Relative
+            $result = $base . $url;
+            while (strpos($result, '/../') !== false) {
+                $result = preg_replace('#[^/]+/+../+#i', '/', $result);
+            }
+            while (strpos($result, '/./') !== false) {
+                $result = str_replace('/./', '/', $result);
+            }
+            return $result;
+
+        });
+
+        #$rewriter->rewriteInPQ($this->phpQuery);
+        #echo($this->phpQuery->html());
+    }
+
+    /**
+     * Use cURL to request a URL, and return a SS_HTTPResponse object.
+     *
+     * @param string $url
+     * @param string $method
+     * @param string $data
+     * @param string $headers
+     * @param array $curlOptions
+     * @return \SS_HTTPResponse
+     */
+    protected function curlRequest($url, $method, $data = null, $headers = null, $curlOptions = array())
+    {
+        $ch        = curl_init();
+        $timeout   = 5;
+        $ssInfo = new SapphireInfo;
+        $useragent = 'SilverStripe/' . $ssInfo->version();
+
+        curl_setopt($ch, CURLOPT_URL, $url);
+        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+        curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
+        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
+        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
+        curl_setopt($ch, CURLOPT_HEADER, 1);
+
+        if ($headers) {
+            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
+        }
+
+        // Add fields to POST and PUT requests
+        if ($method == 'POST') {
+            curl_setopt($ch, CURLOPT_POST, 1);
+            curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
+        } elseif ($method == 'PUT') {
+            $put = fopen("php://temp", 'r+');
+            fwrite($put, $data);
+            fseek($put, 0);
+
+            curl_setopt($ch, CURLOPT_PUT, 1);
+            curl_setopt($ch, CURLOPT_INFILE, $put);
+            curl_setopt($ch, CURLOPT_INFILESIZE, strlen($data));
+        }
+
+        // Follow redirects
+        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
+
+        // Set any custom options passed to the request() function
+        curl_setopt_array($ch, $curlOptions);
+
+        // Run request
+        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
+        $fullResponseBody = curl_exec($ch);
+        $curlError = curl_error($ch);
+
+        list($responseHeaders, $responseBody) = explode("\n\n", str_replace("\r", "", $fullResponseBody), 2);
+        if (preg_match("#^HTTP/1.1 100#", $responseHeaders)) {
+            list($responseHeaders, $responseBody) = explode("\n\n", str_replace("\r", "", $responseBody), 2);
+        }
+
+        $responseHeaders = explode("\n", trim($responseHeaders));
+        array_shift($responseHeaders);
+
+        $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+        curl_close($ch);
+
+        if ($curlError !== '' || $statusCode == 0) {
+            $statusCode = 500;
+        }
+
+        $response = new SS_HTTPResponse($responseBody, $statusCode);
+        foreach ($responseHeaders as $headerLine) {
+            if (strpos($headerLine, ":") !== false) {
+                list($headerName, $headerVal) = explode(":", $headerLine, 2);
+                $response->addHeader(trim($headerName), trim($headerVal));
+            }
+        }
+
+        
+        return $response;
+    }
+
+    /**
+     * Log a message if the logging has been setup according to docs
+     *
+     * @param string $message
+     * @return void
+     */
+    protected function log($message)
+    {
+        $logFile = Config::inst()->get('StaticSiteContentExtractor', 'log_file');
+        if (!$logFile) {
+            return;
+        }
+
+        if (is_writable($logFile) || !file_exists($logFile) && is_writable(dirname($logFile))) {
+            error_log($message . "\n", 3, $logFile);
+        }
+    }
 }
diff --git a/code/StaticSiteContentItem.php b/code/StaticSiteContentItem.php
index 56ffc0b..8bca9fa 100644
--- a/code/StaticSiteContentItem.php
+++ b/code/StaticSiteContentItem.php
@@ -1,69 +1,81 @@
 <?php
 
-class StaticSiteContentItem extends ExternalContentItem {
-	public function init() {
-		$url = $this->externalId;
-
-		$processedURL = $this->source->urlList()->processedURL($url); 
-		$parentURL = $this->source->urlList()->parentProcessedURL($processedURL);
-
-		$subURL = substr($processedURL, strlen($parentURL));
-		if($subURL != "/") $subURL = preg_replace('#(^/)|(/$)#','',$subURL);
-		
-		$this->Name = $subURL;
-		$this->Title = $this->Name;
-		$this->AbsoluteURL = preg_replace('#/$#','', $this->source->BaseUrl) . $this->externalId;
-		$this->ProcessedURL = $processedURL;
-	} 	
-
-	public function stageChildren($showAll = false) {
-		if(!$this->source->urlList()->hasCrawled()) return new ArrayList;
-
-		$childrenURLs = $this->source->urlList()->getChildren($this->externalId);
-
-		$children = new ArrayList;
-		foreach($childrenURLs as $child) {
-			$children->push($this->source->getObject($child));
-		}
-
-		return $children;
-	}
-
-	public function numChildren() {
-		if(!$this->source->urlList()->hasCrawled()) return 0;
-
-		return sizeof($this->source->urlList()->getChildren($this->externalId));
-	}
-
-	public function getType() {
-		return "sitetree";
-	}
-
-	public function getCMSFields() {
-		$fields = parent::getCMSFields();
-
-		// Add the preview fields here, including rules used
-		$t = new StaticSitePageTransformer;
-
-		$urlField = new ReadonlyField("PreviewSourceURL", "Imported from",
-			"<a href=\"$this->AbsoluteURL\">" . Convert::raw2xml($this->AbsoluteURL) . "</a>");
-		$urlField->dontEscape = true;
-
-		$fields->addFieldToTab("Root.Preview", $urlField);
-
-		$content = $t->getContentFieldsAndSelectors($this);
-		if(count($content) === 0) {
-			return $fields;
-		}
-		foreach($content as $k => $v) {
-			$readonlyField = new ReadonlyField("Preview$k", "$k<br>\n<em>" . $v['selector'] . "</em>", $v['content']);
-			$readonlyField->addExtraClass('readonly-click-toggle');
-			$fields->addFieldToTab("Root.Preview", $readonlyField);
-		}
-
-		Requirements::javascript('staticsiteconnector/js/StaticSiteContentItem.js');
-		Requirements::css('staticsiteconnector/css/StaticSiteContentItem.css');
-
-		return $fields;
-	}
-}	
\ No newline at end of file
+class StaticSiteContentItem extends ExternalContentItem
+{
+    public function init()
+    {
+        $url = $this->externalId;
+
+        $processedURL = $this->source->urlList()->processedURL($url);
+        $parentURL = $this->source->urlList()->parentProcessedURL($processedURL);
+
+        $subURL = substr($processedURL, strlen($parentURL));
+        if ($subURL != "/") {
+            $subURL = preg_replace('#(^/)|(/$)#', '', $subURL);
+        }
+        
+        $this->Name = $subURL;
+        $this->Title = $this->Name;
+        $this->AbsoluteURL = preg_replace('#/$#', '', $this->source->BaseUrl) . $this->externalId;
+        $this->ProcessedURL = $processedURL;
+    }
+
+    public function stageChildren($showAll = false)
+    {
+        if (!$this->source->urlList()->hasCrawled()) {
+            return new ArrayList;
+        }
+
+        $childrenURLs = $this->source->urlList()->getChildren($this->externalId);
+
+        $children = new ArrayList;
+        foreach ($childrenURLs as $child) {
+            $children->push($this->source->getObject($child));
+        }
+
+        return $children;
+    }
+
+    public function numChildren()
+    {
+        if (!$this->source->urlList()->hasCrawled()) {
+            return 0;
+        }
+
+        return sizeof($this->source->urlList()->getChildren($this->externalId));
+    }
+
+    public function getType()
+    {
+        return "sitetree";
+    }
+
+    public function getCMSFields()
+    {
+        $fields = parent::getCMSFields();
+
+        // Add the preview fields here, including rules used
+        $t = new StaticSitePageTransformer;
+
+        $urlField = new ReadonlyField("PreviewSourceURL", "Imported from",
+            "<a href=\"$this->AbsoluteURL\">" . Convert::raw2xml($this->AbsoluteURL) . "</a>");
+        $urlField->dontEscape = true;
+
+        $fields->addFieldToTab("Root.Preview", $urlField);
+
+        $content = $t->getContentFieldsAndSelectors($this);
+        if (count($content) === 0) {
+            return $fields;
+        }
+        foreach ($content as $k => $v) {
+            $readonlyField = new ReadonlyField("Preview$k", "$k<br>\n<em>" . $v['selector'] . "</em>", $v['content']);
+            $readonlyField->addExtraClass('readonly-click-toggle');
+            $fields->addFieldToTab("Root.Preview", $readonlyField);
+        }
+
+        Requirements::javascript('staticsiteconnector/js/StaticSiteContentItem.js');
+        Requirements::css('staticsiteconnector/css/StaticSiteContentItem.css');
+
+        return $fields;
+    }
+}
diff --git a/code/StaticSiteContentSource.php b/code/StaticSiteContentSource.php
index 412136d..8caf79f 100644
--- a/code/StaticSiteContentSource.php
+++ b/code/StaticSiteContentSource.php
@@ -1,377 +1,401 @@
 <?php
 
-class StaticSiteContentSource extends ExternalContentSource {
-
-	public static $db = array(
-		'BaseUrl' => 'Varchar(255)',
-		'UrlProcessor' => 'Varchar(255)',
-		'ExtraCrawlUrls' => 'Text',
-		'UrlExcludePatterns' => 'Text',
-	);
-
-	public static $has_many = array(
-		"Schemas" => "StaticSiteContentSource_ImportSchema",
-		"Pages" => "SiteTree",
-	);
-
-
-	public function getCMSFields() {
-		$fields = parent::getCMSFields();
-
-		$importRules = $fields->dataFieldByName('Schemas');
-		$importRules->getConfig()->removeComponentsByType('GridFieldAddExistingAutocompleter');
-		$importRules->getConfig()->removeComponentsByType('GridFieldAddNewButton');
-		$addNewButton = new GridFieldAddNewButton('after');
-		$addNewButton->setButtonName("Add schema");
-		$importRules->getConfig()->addComponent($addNewButton);
-
-		$fields->removeFieldFromTab("Root", "Schemas");
-		$fields->removeFieldFromTab("Root", "Pages");
-		$fields->addFieldToTab("Root.Main", new LiteralField("", "<p>Each import rule will import content for a field"
-			. " by getting the results of a CSS selector.  If more than one rule exists for a field, then they will be"
-			. " processed in the order they appear.  The first rule that returns content will be the one used.</p>"));
-		$fields->addFieldToTab("Root.Main", $importRules);
-
-		$processingOptions = array("" => "No pre-processing");
-		foreach(ClassInfo::implementorsOf('StaticSiteUrlProcessor') as $processor) {
-			$processorObj = new $processor;
-			$processingOptions[$processor] = "<strong>" . Convert::raw2xml($processorObj->getName()) 
-				. "</strong><br>" . Convert::raw2xml($processorObj->getDescription());
-		}
-
-		$fields->addFieldToTab("Root.Main", new OptionsetField("UrlProcessor", "URL processing", $processingOptions));
-
-
-		switch($this->urlList()->getSpiderStatus()) {
-			case "Not started":
-				$crawlButtonText = _t('StaticSiteContentSource.CRAWL_SITE', 'Crawl site');
-				break;
-
-			case "Partial":
-				$crawlButtonText = _t('StaticSiteContentSource.RESUME_CRAWLING', 'Resume crawling');
-				break;
-
-			case "Complete":
-				$crawlButtonText = _t('StaticSiteContentSource.RECRAWL_SITE', 'Re-crawl site');
-				break;
-
-			default:
-				throw new LogicException("Invalid getSpiderStatus() value '".$this->urlList()->getSpiderStatus().";");
-		}
-		
-
-		$crawlButton = FormAction::create('crawlsite', $crawlButtonText)
-			->setAttribute('data-icon', 'arrow-circle-double')
-			->setUseButtonTag(true);
-		$fields->addFieldsToTab('Root.Crawl', array(
-			new ReadonlyField("CrawlStatus", "Crawling Status", $this->urlList()->getSpiderStatus()),
-			new ReadonlyField("NumURLs", "Number of URLs", $this->urlList()->getNumURLs()),
-
-			new LiteralField('CrawlActions', 
-			"<p>Before importing this content, all URLs on the site must be crawled (like a search engine does). Click"
-			. " the button below to do so:</p>"
-			. "<div class='Actions'>{$crawlButton->forTemplate()}</div>")
-		));
-
-		if($this->urlList()->getSpiderStatus() == "Complete") {
-			$urlsAsUL = "<ul>";
-			foreach(array_unique($this->urlList()->getProcessedURLs()) as $raw => $processed) {
-				if($raw == $processed) {
-					$urlsAsUL .= "<li>$processed</li>";
-				} else {
-					$urlsAsUL .= "<li>$processed <em>(was: $raw)</em></li>";
-				}
-			}
-			$urlsAsUL .= "</ul>";
-
-			$fields->addFieldToTab('Root.Crawl', 
-				new LiteralField('CrawlURLList', "<p>The following URLs have been identified:</p>" . $urlsAsUL)
-			);
-
-			
-		}
-
-		$fields->dataFieldByName("ExtraCrawlUrls")
-			->setDescription("Add URLs that are not reachable through content scraping, eg: '/about/team'. One per line")
-			->setTitle('Additional URLs');
-		$fields->dataFieldByName("UrlExcludePatterns")
-			->setDescription("URLs that should be excluded (support regular expression). eg: '/about/.*'. One per URL")
-			->setTitle('Excluded URLs');
-
-		return $fields;
-	}
-
-	public function onAfterWrite() {
-		parent::onAfterWrite();
-
-		$urlList = $this->urlList();
-		if($this->isChanged('UrlProcessor') && $urlList->hasCrawled()) {
-			if($processorClass = $this->UrlProcessor) {
-				$urlList->setUrlProcessor(new $processorClass);
-			} else {
-				$urlList->setUrlProcessor(null);
-			}
-			$urlList->reprocessUrls();
-		}
-	}
-
-
-	public function urlList() {
-		if(!$this->urlList) {
-			$this->urlList = new StaticSiteUrlList($this->BaseUrl, "../assets/static-site-" . $this->ID);
-			if($processorClass = $this->UrlProcessor) {
-				$this->urlList->setUrlProcessor(new $processorClass);
-			}
-			if($this->ExtraCrawlUrls) {
-				$extraCrawlUrls = preg_split('/\s+/', trim($this->ExtraCrawlUrls));
-				$this->urlList->setExtraCrawlUrls($extraCrawlUrls);
-			}
-			if($this->UrlExcludePatterns) {
-				$urlExcludePatterns = preg_split('/\s+/', trim($this->UrlExcludePatterns));
-				$this->urlList->setExcludePatterns($urlExcludePatterns);
-			}
- 		}
-		return $this->urlList;
-	}
-
-	/**
-	 * Crawl the target site
-	 * @return StaticSiteCrawler
-	 */
-	public function crawl($limit=false, $verbose=false) {
-		if(!$this->BaseUrl) throw new LogicException("Can't crawl a site until Base URL is set.");
-		return $this->urlList()->crawl($limit, $verbose);
-	}
-
-	public function getSchemaForURL($absoluteURL) {
-		// TODO: Return the right schema
-		return $this->Schemas()->First();
-	} 
-
-	/**
-	 * Returns a StaticSiteContentItem for the given URL.
-	 * Relative URLs are used as the unique identifiers by this importer
-	 * 
-	 * @param $id The URL, relative to BaseURL, starting with "/".
-	 * @return DataObject
-	 */
-	public function getObject($id) {
-
-		if($id[0] != "/") {
-			$id = $this->decodeId($id);
-			if($id[0] != "/") throw new InvalidArgumentException("\$id must start with /");
-		}
-
-		return new StaticSiteContentItem($this, $id);
-	}
-
-	public function getRoot() {
-		return $this->getObject('/');
-	}
-
-	public function allowedImportTargets() {
-		return array('sitetree' => true);
-	}
-
-	/**
-	 * Return the root node
-	 * @return ArrayList A list containing the root node
-	 */
-	public function stageChildren($showAll = false) {
-		if(!$this->urlList()->hasCrawled()) return new ArrayList;
-
-		return new ArrayList(array(
-			$this->getObject("/")
-		));
-
-	}
-
-	public function getContentImporter($target=null) {
-		return new StaticSiteImporter();
-	}
-
-	public function isValid() {
-		if(!(boolean)$this->BaseUrl) {
-			return false;
-		}
-		return true;
-	}
-	public function canImport($member = null) {
-		return $this->isValid();
-	}
-	public function canCreate($member = null) {
-		return true;
-	}
-
+class StaticSiteContentSource extends ExternalContentSource
+{
+
+    public static $db = array(
+        'BaseUrl' => 'Varchar(255)',
+        'UrlProcessor' => 'Varchar(255)',
+        'ExtraCrawlUrls' => 'Text',
+        'UrlExcludePatterns' => 'Text',
+    );
+
+    public static $has_many = array(
+        "Schemas" => "StaticSiteContentSource_ImportSchema",
+        "Pages" => "SiteTree",
+    );
+
+
+    public function getCMSFields()
+    {
+        $fields = parent::getCMSFields();
+
+        $importRules = $fields->dataFieldByName('Schemas');
+        $importRules->getConfig()->removeComponentsByType('GridFieldAddExistingAutocompleter');
+        $importRules->getConfig()->removeComponentsByType('GridFieldAddNewButton');
+        $addNewButton = new GridFieldAddNewButton('after');
+        $addNewButton->setButtonName("Add schema");
+        $importRules->getConfig()->addComponent($addNewButton);
+
+        $fields->removeFieldFromTab("Root", "Schemas");
+        $fields->removeFieldFromTab("Root", "Pages");
+        $fields->addFieldToTab("Root.Main", new LiteralField("", "<p>Each import rule will import content for a field"
+            . " by getting the results of a CSS selector.  If more than one rule exists for a field, then they will be"
+            . " processed in the order they appear.  The first rule that returns content will be the one used.</p>"));
+        $fields->addFieldToTab("Root.Main", $importRules);
+
+        $processingOptions = array("" => "No pre-processing");
+        foreach (ClassInfo::implementorsOf('StaticSiteUrlProcessor') as $processor) {
+            $processorObj = new $processor;
+            $processingOptions[$processor] = "<strong>" . Convert::raw2xml($processorObj->getName())
+                . "</strong><br>" . Convert::raw2xml($processorObj->getDescription());
+        }
+
+        $fields->addFieldToTab("Root.Main", new OptionsetField("UrlProcessor", "URL processing", $processingOptions));
+
+
+        switch ($this->urlList()->getSpiderStatus()) {
+            case "Not started":
+                $crawlButtonText = _t('StaticSiteContentSource.CRAWL_SITE', 'Crawl site');
+                break;
+
+            case "Partial":
+                $crawlButtonText = _t('StaticSiteContentSource.RESUME_CRAWLING', 'Resume crawling');
+                break;
+
+            case "Complete":
+                $crawlButtonText = _t('StaticSiteContentSource.RECRAWL_SITE', 'Re-crawl site');
+                break;
+
+            default:
+                throw new LogicException("Invalid getSpiderStatus() value '".$this->urlList()->getSpiderStatus().";");
+        }
+        
+
+        $crawlButton = FormAction::create('crawlsite', $crawlButtonText)
+            ->setAttribute('data-icon', 'arrow-circle-double')
+            ->setUseButtonTag(true);
+        $fields->addFieldsToTab('Root.Crawl', array(
+            new ReadonlyField("CrawlStatus", "Crawling Status", $this->urlList()->getSpiderStatus()),
+            new ReadonlyField("NumURLs", "Number of URLs", $this->urlList()->getNumURLs()),
+
+            new LiteralField('CrawlActions',
+            "<p>Before importing this content, all URLs on the site must be crawled (like a search engine does). Click"
+            . " the button below to do so:</p>"
+            . "<div class='Actions'>{$crawlButton->forTemplate()}</div>")
+        ));
+
+        if ($this->urlList()->getSpiderStatus() == "Complete") {
+            $urlsAsUL = "<ul>";
+            foreach (array_unique($this->urlList()->getProcessedURLs()) as $raw => $processed) {
+                if ($raw == $processed) {
+                    $urlsAsUL .= "<li>$processed</li>";
+                } else {
+                    $urlsAsUL .= "<li>$processed <em>(was: $raw)</em></li>";
+                }
+            }
+            $urlsAsUL .= "</ul>";
+
+            $fields->addFieldToTab('Root.Crawl',
+                new LiteralField('CrawlURLList', "<p>The following URLs have been identified:</p>" . $urlsAsUL)
+            );
+        }
+
+        $fields->dataFieldByName("ExtraCrawlUrls")
+            ->setDescription("Add URLs that are not reachable through content scraping, eg: '/about/team'. One per line")
+            ->setTitle('Additional URLs');
+        $fields->dataFieldByName("UrlExcludePatterns")
+            ->setDescription("URLs that should be excluded (support regular expression). eg: '/about/.*'. One per URL")
+            ->setTitle('Excluded URLs');
+
+        return $fields;
+    }
+
+    public function onAfterWrite()
+    {
+        parent::onAfterWrite();
+
+        $urlList = $this->urlList();
+        if ($this->isChanged('UrlProcessor') && $urlList->hasCrawled()) {
+            if ($processorClass = $this->UrlProcessor) {
+                $urlList->setUrlProcessor(new $processorClass);
+            } else {
+                $urlList->setUrlProcessor(null);
+            }
+            $urlList->reprocessUrls();
+        }
+    }
+
+
+    public function urlList()
+    {
+        if (!$this->urlList) {
+            $this->urlList = new StaticSiteUrlList($this->BaseUrl, "../assets/static-site-" . $this->ID);
+            if ($processorClass = $this->UrlProcessor) {
+                $this->urlList->setUrlProcessor(new $processorClass);
+            }
+            if ($this->ExtraCrawlUrls) {
+                $extraCrawlUrls = preg_split('/\s+/', trim($this->ExtraCrawlUrls));
+                $this->urlList->setExtraCrawlUrls($extraCrawlUrls);
+            }
+            if ($this->UrlExcludePatterns) {
+                $urlExcludePatterns = preg_split('/\s+/', trim($this->UrlExcludePatterns));
+                $this->urlList->setExcludePatterns($urlExcludePatterns);
+            }
+        }
+        return $this->urlList;
+    }
+
+    /**
+     * Crawl the target site
+     * @return StaticSiteCrawler
+     */
+    public function crawl($limit=false, $verbose=false)
+    {
+        if (!$this->BaseUrl) {
+            throw new LogicException("Can't crawl a site until Base URL is set.");
+        }
+        return $this->urlList()->crawl($limit, $verbose);
+    }
+
+    public function getSchemaForURL($absoluteURL)
+    {
+        // TODO: Return the right schema
+        return $this->Schemas()->First();
+    }
+
+    /**
+     * Returns a StaticSiteContentItem for the given URL.
+     * Relative URLs are used as the unique identifiers by this importer
+     * 
+     * @param $id The URL, relative to BaseURL, starting with "/".
+     * @return DataObject
+     */
+    public function getObject($id)
+    {
+        if ($id[0] != "/") {
+            $id = $this->decodeId($id);
+            if ($id[0] != "/") {
+                throw new InvalidArgumentException("\$id must start with /");
+            }
+        }
+
+        return new StaticSiteContentItem($this, $id);
+    }
+
+    public function getRoot()
+    {
+        return $this->getObject('/');
+    }
+
+    public function allowedImportTargets()
+    {
+        return array('sitetree' => true);
+    }
+
+    /**
+     * Return the root node
+     * @return ArrayList A list containing the root node
+     */
+    public function stageChildren($showAll = false)
+    {
+        if (!$this->urlList()->hasCrawled()) {
+            return new ArrayList;
+        }
+
+        return new ArrayList(array(
+            $this->getObject("/")
+        ));
+    }
+
+    public function getContentImporter($target=null)
+    {
+        return new StaticSiteImporter();
+    }
+
+    public function isValid()
+    {
+        if (!(boolean)$this->BaseUrl) {
+            return false;
+        }
+        return true;
+    }
+    public function canImport($member = null)
+    {
+        return $this->isValid();
+    }
+    public function canCreate($member = null)
+    {
+        return true;
+    }
 }
 
 /**
  * A collection of ImportRules that apply to some or all of the pages being imported.
  */
-class StaticSiteContentSource_ImportSchema extends DataObject {
-	public static $db = array(
-		"DataType" => "Varchar", // classname
-		"Order" => "Int",
-		"AppliesTo" => "Varchar(255)", // regex
-	);
-	public static $summary_fields = array(
-		"AppliesTo",
-		"DataType",
-		"Order",
-	);
-	public static $field_labels = array(
-		"AppliesTo" => "URLs applied to",
-		"DataType" => "Data type",
-		"Order" => "Priority",
-	);
-
-	public static $default_sort = "Order";
-
-	public static $has_one = array(
-		"ContentSource" => "StaticSiteContentSource",
-	);
-
-	public static $has_many = array(
-		"ImportRules" => "StaticSiteContentSource_ImportRule",
-	);
-
-	public function getTitle() {
-		return $this->DataType.' ('.$this->AppliesTo.')';
-	}
-
-	/**
-	 * 
-	 * @return FieldList
-	 */
-	public function getCMSFields() {
-		$fields = parent::getCMSFields();
-		$fields->removeFieldFromTab('Root.Main', 'DataType');
-		$fields->removeByName('ContentSourceID');
-		$dataObjects = ClassInfo::subclassesFor('DataObject');
-		array_shift($dataObjects);
-		natcasesort($dataObjects);
-		$fields->addFieldToTab('Root.Main', new DropdownField('DataType', 'DataType', $dataObjects));
-
-		$importRules = $fields->dataFieldByName('ImportRules');
-		if($importRules) {
-			$importRules->getConfig()->removeComponentsByType('GridFieldAddExistingAutocompleter');
-			$importRules->getConfig()->removeComponentsByType('GridFieldAddNewButton');
-			$addNewButton = new GridFieldAddNewButton('after');
-			$addNewButton->setButtonName("Add Rule");
-			$importRules->getConfig()->addComponent($addNewButton);
-
-			$fields->removeFieldFromTab('Root', 'ImportRules');
-			$fields->addFieldToTab('Root.Main', $importRules);
-		}
-
-		return $fields;
-	}
-
-	public function requireDefaultRecords() {
-		foreach(StaticSiteContentSource::get() as $source) {
-			if(!$source->Schemas()->count()) {
-				Debug::message("Making a schema for $source->ID");
-				$defaultSchema = new StaticSiteContentSource_ImportSchema;
-				$defaultSchema->Order = 1000000;
-				$defaultSchema->AppliesTo = ".*";
-				$defaultSchema->DataType = "Page";
-				$defaultSchema->ContentSourceID = $source->ID;
-				$defaultSchema->write();
-
-
-				foreach(StaticSiteContentSource_ImportRule::get()->filter(array('SchemaID' => 0)) as $rule) {
-					$rule->SchemaID = $defaultSchema->ID;
-					$rule->write();
-				}
-			}
-		}
-	}
-
-	/**
-	 * Return the import rules in a format suitable for configuring StaticSiteContentExtractor.
-	 * 
-	 * @return array A map of field name => array(CSS selector, CSS selector, ...)
-	 */
-	public function getImportRules() {
-		$output = array();
-
-		foreach($this->ImportRules() as $rule) {
-			if(!isset($output[$rule->FieldName])) $output[$rule->FieldName] = array();
-			$ruleArray = array(
-				'selector' => $rule->CSSSelector,
-				'attribute' => $rule->Attribute,
-				'plaintext' => $rule->PlainText,
-				'excludeselectors' => preg_split('/\s+/', trim($rule->ExcludeCSSSelector)),
-				'outerhtml' => $rule->OuterHTML,
-			);
-			$output[$rule->FieldName][] = $ruleArray;
-		}
-
-		return $output;
-	}
-
+class StaticSiteContentSource_ImportSchema extends DataObject
+{
+    public static $db = array(
+        "DataType" => "Varchar", // classname
+        "Order" => "Int",
+        "AppliesTo" => "Varchar(255)", // regex
+    );
+    public static $summary_fields = array(
+        "AppliesTo",
+        "DataType",
+        "Order",
+    );
+    public static $field_labels = array(
+        "AppliesTo" => "URLs applied to",
+        "DataType" => "Data type",
+        "Order" => "Priority",
+    );
+
+    public static $default_sort = "Order";
+
+    public static $has_one = array(
+        "ContentSource" => "StaticSiteContentSource",
+    );
+
+    public static $has_many = array(
+        "ImportRules" => "StaticSiteContentSource_ImportRule",
+    );
+
+    public function getTitle()
+    {
+        return $this->DataType.' ('.$this->AppliesTo.')';
+    }
+
+    /**
+     * 
+     * @return FieldList
+     */
+    public function getCMSFields()
+    {
+        $fields = parent::getCMSFields();
+        $fields->removeFieldFromTab('Root.Main', 'DataType');
+        $fields->removeByName('ContentSourceID');
+        $dataObjects = ClassInfo::subclassesFor('DataObject');
+        array_shift($dataObjects);
+        natcasesort($dataObjects);
+        $fields->addFieldToTab('Root.Main', new DropdownField('DataType', 'DataType', $dataObjects));
+
+        $importRules = $fields->dataFieldByName('ImportRules');
+        if ($importRules) {
+            $importRules->getConfig()->removeComponentsByType('GridFieldAddExistingAutocompleter');
+            $importRules->getConfig()->removeComponentsByType('GridFieldAddNewButton');
+            $addNewButton = new GridFieldAddNewButton('after');
+            $addNewButton->setButtonName("Add Rule");
+            $importRules->getConfig()->addComponent($addNewButton);
+
+            $fields->removeFieldFromTab('Root', 'ImportRules');
+            $fields->addFieldToTab('Root.Main', $importRules);
+        }
+
+        return $fields;
+    }
+
+    public function requireDefaultRecords()
+    {
+        foreach (StaticSiteContentSource::get() as $source) {
+            if (!$source->Schemas()->count()) {
+                Debug::message("Making a schema for $source->ID");
+                $defaultSchema = new StaticSiteContentSource_ImportSchema;
+                $defaultSchema->Order = 1000000;
+                $defaultSchema->AppliesTo = ".*";
+                $defaultSchema->DataType = "Page";
+                $defaultSchema->ContentSourceID = $source->ID;
+                $defaultSchema->write();
+
+
+                foreach (StaticSiteContentSource_ImportRule::get()->filter(array('SchemaID' => 0)) as $rule) {
+                    $rule->SchemaID = $defaultSchema->ID;
+                    $rule->write();
+                }
+            }
+        }
+    }
+
+    /**
+     * Return the import rules in a format suitable for configuring StaticSiteContentExtractor.
+     * 
+     * @return array A map of field name => array(CSS selector, CSS selector, ...)
+     */
+    public function getImportRules()
+    {
+        $output = array();
+
+        foreach ($this->ImportRules() as $rule) {
+            if (!isset($output[$rule->FieldName])) {
+                $output[$rule->FieldName] = array();
+            }
+            $ruleArray = array(
+                'selector' => $rule->CSSSelector,
+                'attribute' => $rule->Attribute,
+                'plaintext' => $rule->PlainText,
+                'excludeselectors' => preg_split('/\s+/', trim($rule->ExcludeCSSSelector)),
+                'outerhtml' => $rule->OuterHTML,
+            );
+            $output[$rule->FieldName][] = $ruleArray;
+        }
+
+        return $output;
+    }
 }
 
 /**
  * A single import rule that forms part of an ImportSchema
  */
-class StaticSiteContentSource_ImportRule extends DataObject {
-	public static $db = array(
-		"FieldName" => "Varchar",
-		"CSSSelector" => "Text",
-		"ExcludeCSSSelector" => "Text",
-		"Attribute" => "Varchar",
-		"PlainText" => "Boolean",
-		"OuterHTML" => "Boolean",
-	);
-
-	public static $summary_fields = array(
-		"FieldName",
-		"CSSSelector",
-		"Attribute",
-		"PlainText",
-		"OuterHTML",
-	);
-
-	public static $field_labels = array(
-		"FieldName" => "Field Name",
-		"CSSSelector" => "CSS Selector",
-		"Attribute" => "Element attribute",
-		"PlainText" => "Convert to plain text",
-		"OuterHTML" => "Use the outer HTML",
-	);
-
-	public static $has_one = array(
-		"Schema" => "StaticSiteContentSource_ImportSchema",
-	);
-
-	public function getTitle() {
-		return ($this->FieldName)?$this->FieldName:$this->ID;
-	}
-
-	/**
-	 *
-	 * @return FieldList
-	 */
-	public function getCMSFields() {
-		$fields = parent::getCMSFields();
-
-		$dataType = $this->Schema()->DataType;
-		if($dataType) {
-			$fieldList = singleton($dataType)->inheritedDatabaseFields();
-			$fieldList = array_combine(array_keys($fieldList),array_keys($fieldList));
-			unset($fieldList->ParentID);
-			unset($fieldList->WorkflowDefinitionID);
-			unset($fieldList->Version);
-
-			$fieldNameField = new DropdownField("FieldName", "Field Name", $fieldList);
-			$fieldNameField->setEmptyString("(choose)");
-			$fields->insertBefore($fieldNameField, "CSSSelector");
-		} else {
-			$fields->replaceField('FieldName', $fieldName = new ReadonlyField("FieldName", "Field Name"));
-			$fieldName->setDescription('Save this rule before being able to add a field name');
-		}
-
-		return $fields;
-	}
-}
\ No newline at end of file
+class StaticSiteContentSource_ImportRule extends DataObject
+{
+    public static $db = array(
+        "FieldName" => "Varchar",
+        "CSSSelector" => "Text",
+        "ExcludeCSSSelector" => "Text",
+        "Attribute" => "Varchar",
+        "PlainText" => "Boolean",
+        "OuterHTML" => "Boolean",
+    );
+
+    public static $summary_fields = array(
+        "FieldName",
+        "CSSSelector",
+        "Attribute",
+        "PlainText",
+        "OuterHTML",
+    );
+
+    public static $field_labels = array(
+        "FieldName" => "Field Name",
+        "CSSSelector" => "CSS Selector",
+        "Attribute" => "Element attribute",
+        "PlainText" => "Convert to plain text",
+        "OuterHTML" => "Use the outer HTML",
+    );
+
+    public static $has_one = array(
+        "Schema" => "StaticSiteContentSource_ImportSchema",
+    );
+
+    public function getTitle()
+    {
+        return ($this->FieldName)?$this->FieldName:$this->ID;
+    }
+
+    /**
+     *
+     * @return FieldList
+     */
+    public function getCMSFields()
+    {
+        $fields = parent::getCMSFields();
+
+        $dataType = $this->Schema()->DataType;
+        if ($dataType) {
+            $fieldList = singleton($dataType)->inheritedDatabaseFields();
+            $fieldList = array_combine(array_keys($fieldList), array_keys($fieldList));
+            unset($fieldList->ParentID);
+            unset($fieldList->WorkflowDefinitionID);
+            unset($fieldList->Version);
+
+            $fieldNameField = new DropdownField("FieldName", "Field Name", $fieldList);
+            $fieldNameField->setEmptyString("(choose)");
+            $fields->insertBefore($fieldNameField, "CSSSelector");
+        } else {
+            $fields->replaceField('FieldName', $fieldName = new ReadonlyField("FieldName", "Field Name"));
+            $fieldName->setDescription('Save this rule before being able to add a field name');
+        }
+
+        return $fields;
+    }
+}
diff --git a/code/StaticSiteDataExtension.php b/code/StaticSiteDataExtension.php
index effd33e..8f78b59 100644
--- a/code/StaticSiteDataExtension.php
+++ b/code/StaticSiteDataExtension.php
@@ -1,16 +1,18 @@
 <?php
 
-class StaticSiteDataExtension extends DataExtension {
-	static $has_one = array(
-		"StaticSiteContentSource" => "StaticSiteContentSource",
-	);
-	static $db = array(
-		"StaticSiteURL" => "Varchar(255)",
-	);
+class StaticSiteDataExtension extends DataExtension
+{
+    public static $has_one = array(
+        "StaticSiteContentSource" => "StaticSiteContentSource",
+    );
+    public static $db = array(
+        "StaticSiteURL" => "Varchar(255)",
+    );
 
-	function updateCMSFields(FieldList $fields) {
-		if($this->owner->StaticSiteContentSourceID && $this->owner->StaticSiteURL) {
-			$fields->addFieldToTab('Root.Main', new ReadonlyField('StaticSiteURL', 'Imported URL'), 'MenuTitle');
-		}
-	}
-}
\ No newline at end of file
+    public function updateCMSFields(FieldList $fields)
+    {
+        if ($this->owner->StaticSiteContentSourceID && $this->owner->StaticSiteURL) {
+            $fields->addFieldToTab('Root.Main', new ReadonlyField('StaticSiteURL', 'Imported URL'), 'MenuTitle');
+        }
+    }
+}
diff --git a/code/StaticSiteExternalContentAdminExtension.php b/code/StaticSiteExternalContentAdminExtension.php
index b147aa4..891d7c8 100644
--- a/code/StaticSiteExternalContentAdminExtension.php
+++ b/code/StaticSiteExternalContentAdminExtension.php
@@ -1,35 +1,37 @@
 <?php
 
-class StaticSiteExternalContentAdminExtension extends Extension {
-	static $allowed_actions = array(
-		"crawlsite",
-	);
+class StaticSiteExternalContentAdminExtension extends Extension
+{
+    public static $allowed_actions = array(
+        "crawlsite",
+    );
 
-	public function crawlsite($request) {
-		$selected = isset($request['ID']) ? $request['ID'] : 0;
-		if(!$selected){
-			$messageType = 'bad';
-			$message = _t('ExternalContent.NOITEMSELECTED', 'No item selected to crawl.');
-		
-		} else {
-			$source = ExternalContent::getDataObjectFor($selected);
-			if (!($source instanceof ExternalContentSource)) $source = $from->getSource();
+    public function crawlsite($request)
+    {
+        $selected = isset($request['ID']) ? $request['ID'] : 0;
+        if (!$selected) {
+            $messageType = 'bad';
+            $message = _t('ExternalContent.NOITEMSELECTED', 'No item selected to crawl.');
+        } else {
+            $source = ExternalContent::getDataObjectFor($selected);
+            if (!($source instanceof ExternalContentSource)) {
+                $source = $from->getSource();
+            }
 
-			$messageType = 'good';
-			$message = _t('ExternalContent.CONTENTMIGRATED', 'Crawling successful.');
+            $messageType = 'good';
+            $message = _t('ExternalContent.CONTENTMIGRATED', 'Crawling successful.');
 
-			try {
-				$source->crawl();
-			} catch(Exception $e) {
-				$messageType = 'bad';
-				$message = "Error crawling: " . $e->getMessage();
-			}
+            try {
+                $source->crawl();
+            } catch (Exception $e) {
+                $messageType = 'bad';
+                $message = "Error crawling: " . $e->getMessage();
+            }
+        }
 
-		}
+        Session::set("FormInfo.Form_EditForm.formError.message", $message);
+        Session::set("FormInfo.Form_EditForm.formError.type", $messageType);
 
-		Session::set("FormInfo.Form_EditForm.formError.message", $message);
-		Session::set("FormInfo.Form_EditForm.formError.type", $messageType);
-
-		return $this->owner->getResponseNegotiator()->respond($this->owner->getRequest());	
-	}
-}
\ No newline at end of file
+        return $this->owner->getResponseNegotiator()->respond($this->owner->getRequest());
+    }
+}
diff --git a/code/StaticSiteImporter.php b/code/StaticSiteImporter.php
index 7ba5493..ac9d2c8 100644
--- a/code/StaticSiteImporter.php
+++ b/code/StaticSiteImporter.php
@@ -1,12 +1,14 @@
 <?php
 
-class StaticSiteImporter extends ExternalContentImporter {
-	public function __construct() {
-		$this->contentTransforms['sitetree'] = new StaticSitePageTransformer();
-	}
+class StaticSiteImporter extends ExternalContentImporter
+{
+    public function __construct()
+    {
+        $this->contentTransforms['sitetree'] = new StaticSitePageTransformer();
+    }
 
-	public function getExternalType($item) {
-		return "sitetree";
-	}
-
-}
\ No newline at end of file
+    public function getExternalType($item)
+    {
+        return "sitetree";
+    }
+}
diff --git a/code/StaticSiteLinkRewriter.php b/code/StaticSiteLinkRewriter.php
index c2acb2e..4e88254 100644
--- a/code/StaticSiteLinkRewriter.php
+++ b/code/StaticSiteLinkRewriter.php
@@ -5,65 +5,70 @@
 /**
  * Helper class for rewriting links using phpQuery.
  */
-class StaticSiteLinkRewriter {
+class StaticSiteLinkRewriter
+{
 
-	protected $tagMap = array(
-		'a' => array('href'),
-		'img' => array('src'),
-	);
+    protected $tagMap = array(
+        'a' => array('href'),
+        'img' => array('src'),
+    );
 
-	protected $callback;
+    protected $callback;
 
-	function __construct($callback) {
-		$this->callback = $callback;
-	}
+    public function __construct($callback)
+    {
+        $this->callback = $callback;
+    }
 
-	/**
-	 * Set a map of tags & attributes to search for URls.
-	 * 
-	 * Each key is a tagname, and each value is an array of attribute names.
-	 */
-	function setTagMap($tagMap) {
-		$this->tagMap = $tagMap;
-	}
+    /**
+     * Set a map of tags & attributes to search for URls.
+     * 
+     * Each key is a tagname, and each value is an array of attribute names.
+     */
+    public function setTagMap($tagMap)
+    {
+        $this->tagMap = $tagMap;
+    }
 
-	/**
-	 * Return the tagmap
-	 */
-	function getTagMap($tagMap) {
-		$this->tagMap = $tagMap;
-	}
+    /**
+     * Return the tagmap
+     */
+    public function getTagMap($tagMap)
+    {
+        $this->tagMap = $tagMap;
+    }
 
-	/**
-	 * Rewrite URLs in a PHPQuery object.  The content of the object will be modified.
-	 * 
-	 * @param  phpQuery $pq The content containing the links to rewrite
-	 */
-	function rewriteInPQ($pq) {
-		$callback = $this->callback;
+    /**
+     * Rewrite URLs in a PHPQuery object.  The content of the object will be modified.
+     * 
+     * @param  phpQuery $pq The content containing the links to rewrite
+     */
+    public function rewriteInPQ($pq)
+    {
+        $callback = $this->callback;
 
-		// Make URLs absolute
-		foreach($this->tagMap as $tag => $attributes) {
-			foreach($pq[$tag] as $tagObj) {
-				foreach($attributes as $attribute) {
-					if($url = pq($tagObj)->attr($attribute)) {
-						$newURL = $callback($url);
-						pq($tagObj)->attr($attribute, $newURL);
-					}
-				}
-			}
-		}
-	}
+        // Make URLs absolute
+        foreach ($this->tagMap as $tag => $attributes) {
+            foreach ($pq[$tag] as $tagObj) {
+                foreach ($attributes as $attribute) {
+                    if ($url = pq($tagObj)->attr($attribute)) {
+                        $newURL = $callback($url);
+                        pq($tagObj)->attr($attribute, $newURL);
+                    }
+                }
+            }
+        }
+    }
 
-	/**
-	 * Rewrite URLs in the given content snippet.  Returns the updated content.
-	 * 
-	 * @param  phpQuery $pq The content containing the links to rewrite
-	 */
-	function rewriteInContent($content) {
-		$pq = phpQuery::newDocument($content);
-		$this->rewriteInPQ($pq);
-		return $pq->html();
-	}
-
-}
\ No newline at end of file
+    /**
+     * Rewrite URLs in the given content snippet.  Returns the updated content.
+     * 
+     * @param  phpQuery $pq The content containing the links to rewrite
+     */
+    public function rewriteInContent($content)
+    {
+        $pq = phpQuery::newDocument($content);
+        $this->rewriteInPQ($pq);
+        return $pq->html();
+    }
+}
diff --git a/code/StaticSitePageTransformer.php b/code/StaticSitePageTransformer.php
index efc39bc..9494b1c 100644
--- a/code/StaticSitePageTransformer.php
+++ b/code/StaticSitePageTransformer.php
@@ -1,91 +1,94 @@
 <?php
 
-class StaticSitePageTransformer implements ExternalContentTransformer {
-
-	public function transform($item, $parentObject, $duplicateStrategy) {
-		if(Director::is_cli()) {
-			Debug::message("Parent: #$parentObject->ID, $parentObject->Title");
-			Debug::message($item->AbsoluteURL);
-		}
-
-		// Sleep for 100ms to reduce load on the remote server
-		usleep(100*1000);
-
-		// Extract content from the page
-		$contentFields = $this->getContentFieldsAndSelectors($item);
-
-		// Default value for Title
-		if(empty($contentFields['Title'])) {
-			$contentFields['Title'] = array('content' => $item->Name);
-		}
-
-		// Default value for URL segment
-		if(empty($contentFields['URLSegment'])) {
-			$urlSegment = str_replace('/','', $item->Name);
-			$urlSegment = preg_replace('/\.[^.]*$/','',$urlSegment);
-			$urlSegment = str_replace('.','-', $item->Name);
-			$contentFields['URLSegment'] = array('content' => $urlSegment);
-		}
-
-		$schema = $item->getSource()->getSchemaForURL($item->AbsoluteURL);
-
-		$pageType = $schema->DataType;
-
-		if(!$pageType) {
-			throw new Exception('Pagetype for migration schema is empty!');
-		}
-
-		// Create a page with the appropriate fields
-		$page = new $pageType(array());
-		$existingPage = SiteTree::get_by_link($item->getExternalId());
-
-		if($existingPage && $duplicateStrategy === 'Overwrite') {
-			if(get_class($existingPage) !== $pageType) {
-				$existingPage->ClassName = $pageType;
-				$existingPage->write();
-			}
-			if($existingPage) {
-				$page = $existingPage;
-			}
-		}
-
-		$page->StaticSiteContentSourceID = $item->getSource()->ID;
-		$page->StaticSiteURL = $item->AbsoluteURL;
-
-		$page->ParentID = $parentObject ? $parentObject->ID : 0;
-
-		foreach($contentFields as $k => $v) {
-			$page->$k = $v['content'];
-		}
-
-		$page->write();
-
-		if(Director::is_cli()) {
-			Debug::message("#$page->Title");
-			Debug::message("#$page->ID child of #$page->ID");
-		}
-
-		return new TransformResult($page, $item->stageChildren());
-	}
-
-	/**
-	 * Get content from the remote host
-	 * 
-	 * @param  StaticSiteeContentItem $item The item to extract
-	 * @return array A map of field name => array('selector' => selector, 'content' => field content)
-	 */
-	public function getContentFieldsAndSelectors($item) {
-		// Get the import rules from the content source
-		$importSchema = $item->getSource()->getSchemaForURL($item->AbsoluteURL);
-		if(!$importSchema) {
-			return null;
-			throw new LogicException("Couldn't find an import schema for $item->AbsoluteURL");
-		}
-		$importRules = $importSchema->getImportRules();
-
- 		// Extract from the remote page based on those rules
-		$contentExtractor = new StaticSiteContentExtractor($item->AbsoluteURL);
-
-		return $contentExtractor->extractMapAndSelectors($importRules);
-	}
-}
\ No newline at end of file
+class StaticSitePageTransformer implements ExternalContentTransformer
+{
+
+    public function transform($item, $parentObject, $duplicateStrategy)
+    {
+        if (Director::is_cli()) {
+            Debug::message("Parent: #$parentObject->ID, $parentObject->Title");
+            Debug::message($item->AbsoluteURL);
+        }
+
+        // Sleep for 100ms to reduce load on the remote server
+        usleep(100*1000);
+
+        // Extract content from the page
+        $contentFields = $this->getContentFieldsAndSelectors($item);
+
+        // Default value for Title
+        if (empty($contentFields['Title'])) {
+            $contentFields['Title'] = array('content' => $item->Name);
+        }
+
+        // Default value for URL segment
+        if (empty($contentFields['URLSegment'])) {
+            $urlSegment = str_replace('/', '', $item->Name);
+            $urlSegment = preg_replace('/\.[^.]*$/', '', $urlSegment);
+            $urlSegment = str_replace('.', '-', $item->Name);
+            $contentFields['URLSegment'] = array('content' => $urlSegment);
+        }
+
+        $schema = $item->getSource()->getSchemaForURL($item->AbsoluteURL);
+
+        $pageType = $schema->DataType;
+
+        if (!$pageType) {
+            throw new Exception('Pagetype for migration schema is empty!');
+        }
+
+        // Create a page with the appropriate fields
+        $page = new $pageType(array());
+        $existingPage = SiteTree::get_by_link($item->getExternalId());
+
+        if ($existingPage && $duplicateStrategy === 'Overwrite') {
+            if (get_class($existingPage) !== $pageType) {
+                $existingPage->ClassName = $pageType;
+                $existingPage->write();
+            }
+            if ($existingPage) {
+                $page = $existingPage;
+            }
+        }
+
+        $page->StaticSiteContentSourceID = $item->getSource()->ID;
+        $page->StaticSiteURL = $item->AbsoluteURL;
+
+        $page->ParentID = $parentObject ? $parentObject->ID : 0;
+
+        foreach ($contentFields as $k => $v) {
+            $page->$k = $v['content'];
+        }
+
+        $page->write();
+
+        if (Director::is_cli()) {
+            Debug::message("#$page->Title");
+            Debug::message("#$page->ID child of #$page->ID");
+        }
+
+        return new TransformResult($page, $item->stageChildren());
+    }
+
+    /**
+     * Get content from the remote host
+     * 
+     * @param  StaticSiteeContentItem $item The item to extract
+     * @return array A map of field name => array('selector' => selector, 'content' => field content)
+     */
+    public function getContentFieldsAndSelectors($item)
+    {
+        // Get the import rules from the content source
+        $importSchema = $item->getSource()->getSchemaForURL($item->AbsoluteURL);
+        if (!$importSchema) {
+            return null;
+            throw new LogicException("Couldn't find an import schema for $item->AbsoluteURL");
+        }
+        $importRules = $importSchema->getImportRules();
+
+        // Extract from the remote page based on those rules
+        $contentExtractor = new StaticSiteContentExtractor($item->AbsoluteURL);
+
+        return $contentExtractor->extractMapAndSelectors($importRules);
+    }
+}
diff --git a/code/StaticSiteUrlList.php b/code/StaticSiteUrlList.php
index 69c0780..6e40fff 100644
--- a/code/StaticSiteUrlList.php
+++ b/code/StaticSiteUrlList.php
@@ -7,532 +7,606 @@
  *
  * Makes use of PHPCrawl to prepare a list of URLs on the site
  */
-class StaticSiteUrlList {
-	protected $baseURL, $cacheDir;
-
-	/**
-	 * Two element array: contains keys 'inferred' and 'regular':
-	 *  - 'regular' is an array mapping raw URLs to processed URLs
-	 *  - 'inferred' is an array of inferred URLs
-	 */
-	protected $urls = null;
-
-	protected $autoCrawl = false;
-
-	protected $urlProcessor = null;
-
-	protected $extraCrawlURLs = null;
-
-	/**
-	 * A list of regular expression patterns to exclude from scraping
-	 *
-	 * @var array
-	 */
-	protected $excludePatterns = array();
-
-	/**
-	 * Create a new URL List
-	 * @param string $baseURL  The Base URL to find links on
-	 * @param string $cacheDir The local path to cache data into
-	 */
-	function __construct($baseURL, $cacheDir) {
-		// baseURL mus not have a trailing slash
-		if(substr($baseURL,-1) == "/") $baseURL = substr($baseURL,0,-1);
-		// cacheDir must have a trailing slash
-		if(substr($cacheDir,-1) != "/") $cacheDir .= "/";
-
-		$this->baseURL = $baseURL;
-		$this->cacheDir = $cacheDir;
-	}
-
-	/**
-	 * Set a URL processor for this URL List.
-	 *
-	 * URL processors process the URLs before the site heirarchy and inferred meta-data are generated.
-	 * These can be used to tranform URLs from CMSes that don't provide a natural heirarchy into something
-	 * more useful.
-	 *
-	 * See {@link StaticSiteMOSSURLProcessor} for an example.
-	 * 
-	 * @param StaticSiteUrlProcessor $urlProcessor [description]
-	 */
-	function setUrlProcessor(StaticSiteUrlProcessor $urlProcessor) {
-		$this->urlProcessor = $urlProcessor;
-	}
-
-	/**
-	 * Define additional crawl URLs as an array
-	 * Each of these URLs will be crawled in addition the base URL.
-	 * This can be helpful if pages are getting missed by the crawl
-	 */
-	function setExtraCrawlURls($extraCrawlURLs) {
-		$this->extraCrawlURLs = $extraCrawlURLs;
-	}
-
-	/**
-	 * Return the additional crawl URLs as an array
-	 */
-	function getExtraCrawlURLs() {
-		return $this->extraCrawlURLs;
-	}
-
-	/**
-	 * Set an array of regular expression patterns that should be excluded from
-	 * being added to the url list
-	 *
-	 * @param array $excludePatterns
-	 */
-	public function setExcludePatterns(array $excludePatterns) {
-		$this->excludePatterns = $excludePatterns;
-	}
-
-	/**
-	 * Get an array of regular expression patterns that should not be added to
-	 * the url list
-	 *
-	 * @return array
-	 */
-	public function getExcludePatterns() {
-		return $this->excludePatterns;
-	}
-
-	/**
-	 * 
-	 * Set whether the crawl should be triggered on demand.
-	 * @param [type] $autoCrawl [description]
-	 */
-	public function setAutoCrawl($autoCrawl) {
-		$this->autoCrawl = $autoCrawl;
-	}
-
-	/**
-	 * Returns the status of the spidering: "Complete", "Partial", or "Not started"
-	 * @return [type] [description]
-	 */
-	public function getSpiderStatus() {
-		if(file_exists($this->cacheDir . 'urls')) {
-			if(file_exists($this->cacheDir . 'crawlerid')) return "Partial";
-			else return "Complete";
-
-		} else {
-			return "Not started";
-		}
-	}
-
-	/**
-	 * Return the number of URLs crawled so far
-	 */
-	public function getNumURLs() {
-		if($this->urls) {
-			$urls = $this->urls;
-		// Don't rely on loadUrls() as it chokes on partially completed imports
-		} else if(file_exists($this->cacheDir . 'urls')) {
-			$urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
-		} else {
-			return null;
-		}
-
-		return sizeof(array_unique($urls['regular'])) + sizeof($urls['inferred']);
-	}
-
-	/**
-	 * Return the raw URLs as an array
-	 * @return array
-	 */
-	public function getRawURLs() {
-		if($urls = $this->getProcessedURLs()) {
-			return array_keys($urls);
-		}
-	}
-
-	/**
-	 * Return a map of URLs crawled, with raw URLs as keys and processed URLs as values
-	 * @return array
-	 */
-	public function getProcessedURLs() {
-		if($this->hasCrawled() || $this->autoCrawl) {
-			if($this->urls === null) $this->loadUrls();
-			return array_merge(
-				$this->urls['regular'],
-				$this->urls['inferred'] ? array_combine($this->urls['inferred'], $this->urls['inferred']) : array()
-			);
-		}
-	}
-
-	public function hasCrawled() {
-		// There are URLs and we're not in the middle of a crawl
-		return file_exists($this->cacheDir . 'urls') && !file_exists($this->cacheDir . 'crawlerid');
-	}
-
-	/**
-	 * Load the URLs, either by crawling, or by fetching from cache
-	 * @return void
-	 */
-	public function loadUrls() {
-		if($this->hasCrawled()) {
-			$this->urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
-			// Clear out obsolete format
-			if(!isset($this->urls['regular']) || !isset($this->urls['inferred'])) {
-				$this->urls = array('regular' => array(), 'inferred' => array());
-			}
-
-		} else if($this->autoCrawl) {
-			$this->crawl();
-
-		} else {
-			throw new LogicException("Crawl hasn't been executed yet, and autoCrawl is set to false");
-		}
-	}
-
-	/**
-	 * Re-execute the URL processor on all the fetched URLs
-	 * @return void
-	 */
-	public function reprocessUrls() {
-		if($this->urls === null) $this->loadUrls();
-
-		// Clear out all inferred URLs; these will be added
-		$this->urls['inferred'] = array();
-
-		// Reprocess URLs, in case the processing has changed since the last crawl
-		foreach($this->urls['regular'] as $url => $oldProcessed) {
-			$processedURL = $this->generateProcessedURL($url);
-			$this->urls['regular'][$url] = $processedURL;
-
-			// Trigger parent URL back-filling on new processed URL
-			$this->parentProcessedURL($processedURL);
-		}
-
-		$this->saveURLs();
-	}
-
-	/**
-	 *
-	 * @param int $limit
-	 * @param bool $verbose
-	 * @return \StaticSiteCrawler
-	 */
-	public function crawl($limit=false, $verbose=false) {
-		increase_time_limit_to(3600);
-
-		if(!is_dir($this->cacheDir)) mkdir($this->cacheDir);
-
-		$crawler = new StaticSiteCrawler($this, $limit, $verbose);
-		$crawler->enableResumption();
-		$crawler->setUrlCacheType(PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE);
-		$crawler->setWorkingDirectory($this->cacheDir);
-
-		// Allow for resuming an incomplete crawl
-		if(file_exists($this->cacheDir.'crawlerid')) {
-			// We should re-load the partial list of URLs, if relevant
-			// This should only happen when we are resuming a partial crawl
-			if(file_exists($this->cacheDir . 'urls')) {
-				$this->urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
-			} else {
-				$this->urls = array('regular' => array(), 'inferred' => array());
-			}
-			
-			$crawlerID = file_get_contents($this->cacheDir.'crawlerid');
-			$crawler->resume($crawlerID);
-		} else {
-			$crawlerID = $crawler->getCrawlerId();
-			file_put_contents($this->cacheDir.'/crawlerid', $crawlerID);
-			$this->urls = array('regular' => array(), 'inferred' => array());
-		}
-
-		$crawler->setURL($this->baseURL);
-		$crawler->go();
-
-		unlink($this->cacheDir.'crawlerid');
-
-		ksort($this->urls['regular']);
-		ksort($this->urls['inferred']);
-		$this->saveURLs();
-		return $crawler;
-	}
-
-	/**
-	 * Save the current list of URLs to disk
-	 * @return [type] [description]
-	 */
-	function saveURLs() {
-		file_put_contents($this->cacheDir . 'urls', serialize($this->urls));
-	}
-
-	/**
-	 * Add a URL to this list, given the absolute URL
-	 * @param string $url The absolute URL
-	 */
-	function addAbsoluteURL($url) {
-		$simpifiedURL = $this->simplifyURL($url);
-		$simpifiedBase = $this->simplifyURL($this->baseURL);
-
-		if(substr($simpifiedURL,0,strlen($simpifiedBase)) == $simpifiedBase) {
-			$relURL = substr($url, strlen($this->baseURL));
-		} else {
-			throw new InvalidArgumentException("URL $url is not from the site $this->baseURL");
-		}
-
-		return $this->addURL($relURL);
-	}
-
-	function addURL($url) {
-		if($this->urls === null) $this->loadUrls();
-
-		// Generate and save the processed URLs
-		$this->urls['regular'][$url] = $this->generateProcessedURL($url);
-
-		// Trigger parent URL back-filling
-		$this->parentProcessedURL($this->urls['regular'][$url]);
-	}
-
-
-	/**
-	 * Add an inferred URL to the list.
-	 * 
-	 * Since the unprocessed URL isn't available, we use the processed URL in its place.  This should be used with
-	 * some caution.
-	 * 
-	 * @param string $processedURL The processed URL to add.
-	 */
-	function addInferredURL($inferredURL) {
-		if($this->urls === null) $this->loadUrls();
-
-		// Generate and save the processed URLs
-		$this->urls['inferred'][$inferredURL] = $inferredURL;
-
-		// Trigger parent URL back-filling
-		$this->parentProcessedURL($inferredURL);
-	}
-
-	//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	
-	/**
-	 * Return true if the given URL exists
-	 * @param  string $url The URL, either absolute, or relative starting with "/"
-	 * @return boolean     Does the URL exist
-	 */
-	function hasURL($url) {
-		if($this->urls === null) $this->loadUrls();
-
-		// Try and relativise an absolute URL
-		if($url[0] != '/') {
-			$simpifiedURL = $this->simplifyURL($url);
-			$simpifiedBase = $this->simplifyURL($this->baseURL);
-
-			if(substr($simpifiedURL,0,strlen($simpifiedBase)) == $simpifiedBase) {
-				$url = substr($simpifiedURL, strlen($simpifiedBase));
-			} else {
-				throw new InvalidArgumentException("URL $url is not from the site $this->baseURL");
-			}
-		}
-
-		return isset($this->urls['regular'][$url]) || in_array($url, $this->urls['inferred']);
-	}
-
-	/**
-	 * Simplify a URL.
-	 * Ignores https/http differences and "www." / non differences.
-	 * 
-	 * @param  string $url
-	 * @return string
-	 */
-	protected function simplifyURL($url) {
-		return preg_replace('#^https?://(www\.)?#i','http://www.', $url);
-	}
-
-	/**
-	 * Returns true if the given URL is in the list of processed URls
-	 * 
-	 * @param  string  $processedURL The processed URL
-	 * @return boolean               True if it exists, false otherwise
-	 */
-	function hasProcessedURL($processedURL) {
-		if($this->urls === null) $this->loadUrls();
-
-		return in_array($processedURL, $this->urls['regular']) || in_array($processedURL, $this->urls['inferred']);
-
-	}
-
-	/**
-	 * Return the processed URL that is the parent of the given one.
-	 *
-	 * Both input and output are processed URLs
-	 * 
-	 * @param  string $url A relative URL
-	 * @return string      [description]
-	 */
-	function parentProcessedURL($processedURL) {
-		if($processedURL == "/") return "";
-
-		// URL heirachy can be broken down by querystring or by URL
-		$breakpoint = max(strrpos($processedURL, '?'), strrpos($processedURL,'/'));
-
-		// Special case for children of the root
-		if($breakpoint == 0) return "/";
-
-		// Get parent URL
-		$parentProcessedURL = substr($processedURL,0,$breakpoint);
-
-		// If an intermediary URL doesn't exist, create it
-		if(!$this->hasProcessedURL($parentProcessedURL)) $this->addInferredURL($parentProcessedURL);
-
-		return $parentProcessedURL;
-	}
-
-	/**
-	 * Return the regular URL, given the processed one.
-	 *
-	 * Note that the URL processing isn't reversible, so this function works looks by iterating through all URLs.
-	 * If the URL doesn't exist in the list, this function returns null.
-	 * 
-	 * @param  string $processedURL The URL after processing has been applied.
-	 * @return string               The original URL.
-	 */
-	function unprocessedURL($processedURL) {
-		if($url = array_search($processedURL, $this->urls['regular'])) {
-			return $url;
-		
-		} else if(in_array($processedURL, $this->urls['inferred'])) {
-			return $processedURL;
-		} else {
-			return null;
-		}
-	}
-
-	/**
-	 * Find the processed URL in the URL list
-	 * @param  [type] $url [description]
-	 * @return [type]      [description]
-	 */
-	function processedURL($url) {
-		if($this->urls === null) $this->loadUrls();
-
-		if(isset($this->urls['regular'][$url])) {
-			// Generate it if missing
-			if($this->urls['regular'][$url] === true) $this->urls['regular'][$url] = $this->generateProcessedURL($url);
-			return $this->urls['regular'][$url];
-		
-		} elseif(in_array($url, $this->urls['inferred'])) {
-			return $url;
-		}
-	}
-
-	/**
-	 * Execute custom logic for processing URLs prior to heirachy generation.
-	 *
-	 * This can be used to implement logic such as ignoring the "/Pages/" parts of MOSS URLs, or dropping extensions.
-	 * 
-	 * @param  string $url The unprocessed URL
-	 * @return string      The processed URL
-	 */
-	function generateProcessedURL($url) {
-		if(!$url) throw new LogicException("Can't pass a blank URL to generateProcessedURL");
-		if($this->urlProcessor) $url = $this->urlProcessor->processURL($url);
-		if(!$url) throw new LogicException(get_class($this->urlProcessor) . " returned a blank URL.");
-		return $url;
-	}
-
-	/**
-	 * Return the URLs that are a child of the given URL
-	 * @param  [type] $url [description]
-	 * @return [type]      [description]
-	 */
-	function getChildren($url) {
-		if($this->urls === null) $this->loadUrls();
-
-		$processedURL = $this->processedURL($url);
-
-		// Subtly different regex if the URL ends in ? or /
-		if(preg_match('#[/?]$#',$processedURL)) $regEx = '#^'.preg_quote($processedURL,'#') . '[^/?]+$#';
-		else $regEx = '#^'.preg_quote($processedURL,'#') . '[/?][^/?]+$#';
-
-		$children = array();
-		foreach($this->urls['regular'] as $potentialChild => $potentialProcessedChild) {
-			if(preg_match($regEx, $potentialProcessedChild)) {
-				if(!isset($children[$potentialProcessedChild])) {
-					$children[$potentialProcessedChild] = $potentialChild;
-				}
-			}
-		}
-		foreach($this->urls['inferred'] as $potentialProcessedChild) {
-			if(preg_match($regEx, $potentialProcessedChild)) {
-				if(!isset($children[$potentialProcessedChild])) {
-					$children[$potentialProcessedChild] = $potentialProcessedChild;
-				}
-			}
-		}
-
-		return array_values($children);
-	}
+class StaticSiteUrlList
+{
+    protected $baseURL, $cacheDir;
+
+    /**
+     * Two element array: contains keys 'inferred' and 'regular':
+     *  - 'regular' is an array mapping raw URLs to processed URLs
+     *  - 'inferred' is an array of inferred URLs
+     */
+    protected $urls = null;
+
+    protected $autoCrawl = false;
+
+    protected $urlProcessor = null;
+
+    protected $extraCrawlURLs = null;
+
+    /**
+     * A list of regular expression patterns to exclude from scraping
+     *
+     * @var array
+     */
+    protected $excludePatterns = array();
+
+    /**
+     * Create a new URL List
+     * @param string $baseURL  The Base URL to find links on
+     * @param string $cacheDir The local path to cache data into
+     */
+    public function __construct($baseURL, $cacheDir)
+    {
+        // baseURL mus not have a trailing slash
+        if (substr($baseURL, -1) == "/") {
+            $baseURL = substr($baseURL, 0, -1);
+        }
+        // cacheDir must have a trailing slash
+        if (substr($cacheDir, -1) != "/") {
+            $cacheDir .= "/";
+        }
+
+        $this->baseURL = $baseURL;
+        $this->cacheDir = $cacheDir;
+    }
+
+    /**
+     * Set a URL processor for this URL List.
+     *
+     * URL processors process the URLs before the site heirarchy and inferred meta-data are generated.
+     * These can be used to tranform URLs from CMSes that don't provide a natural heirarchy into something
+     * more useful.
+     *
+     * See {@link StaticSiteMOSSURLProcessor} for an example.
+     * 
+     * @param StaticSiteUrlProcessor $urlProcessor [description]
+     */
+    public function setUrlProcessor(StaticSiteUrlProcessor $urlProcessor)
+    {
+        $this->urlProcessor = $urlProcessor;
+    }
+
+    /**
+     * Define additional crawl URLs as an array
+     * Each of these URLs will be crawled in addition the base URL.
+     * This can be helpful if pages are getting missed by the crawl
+     */
+    public function setExtraCrawlURls($extraCrawlURLs)
+    {
+        $this->extraCrawlURLs = $extraCrawlURLs;
+    }
+
+    /**
+     * Return the additional crawl URLs as an array
+     */
+    public function getExtraCrawlURLs()
+    {
+        return $this->extraCrawlURLs;
+    }
+
+    /**
+     * Set an array of regular expression patterns that should be excluded from
+     * being added to the url list
+     *
+     * @param array $excludePatterns
+     */
+    public function setExcludePatterns(array $excludePatterns)
+    {
+        $this->excludePatterns = $excludePatterns;
+    }
+
+    /**
+     * Get an array of regular expression patterns that should not be added to
+     * the url list
+     *
+     * @return array
+     */
+    public function getExcludePatterns()
+    {
+        return $this->excludePatterns;
+    }
+
+    /**
+     * 
+     * Set whether the crawl should be triggered on demand.
+     * @param [type] $autoCrawl [description]
+     */
+    public function setAutoCrawl($autoCrawl)
+    {
+        $this->autoCrawl = $autoCrawl;
+    }
+
+    /**
+     * Returns the status of the spidering: "Complete", "Partial", or "Not started"
+     * @return [type] [description]
+     */
+    public function getSpiderStatus()
+    {
+        if (file_exists($this->cacheDir . 'urls')) {
+            if (file_exists($this->cacheDir . 'crawlerid')) {
+                return "Partial";
+            } else {
+                return "Complete";
+            }
+        } else {
+            return "Not started";
+        }
+    }
+
+    /**
+     * Return the number of URLs crawled so far
+     */
+    public function getNumURLs()
+    {
+        if ($this->urls) {
+            $urls = $this->urls;
+        // Don't rely on loadUrls() as it chokes on partially completed imports
+        } elseif (file_exists($this->cacheDir . 'urls')) {
+            $urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
+        } else {
+            return null;
+        }
+
+        return sizeof(array_unique($urls['regular'])) + sizeof($urls['inferred']);
+    }
+
+    /**
+     * Return the raw URLs as an array
+     * @return array
+     */
+    public function getRawURLs()
+    {
+        if ($urls = $this->getProcessedURLs()) {
+            return array_keys($urls);
+        }
+    }
+
+    /**
+     * Return a map of URLs crawled, with raw URLs as keys and processed URLs as values
+     * @return array
+     */
+    public function getProcessedURLs()
+    {
+        if ($this->hasCrawled() || $this->autoCrawl) {
+            if ($this->urls === null) {
+                $this->loadUrls();
+            }
+            return array_merge(
+                $this->urls['regular'],
+                $this->urls['inferred'] ? array_combine($this->urls['inferred'], $this->urls['inferred']) : array()
+            );
+        }
+    }
+
+    public function hasCrawled()
+    {
+        // There are URLs and we're not in the middle of a crawl
+        return file_exists($this->cacheDir . 'urls') && !file_exists($this->cacheDir . 'crawlerid');
+    }
+
+    /**
+     * Load the URLs, either by crawling, or by fetching from cache
+     * @return void
+     */
+    public function loadUrls()
+    {
+        if ($this->hasCrawled()) {
+            $this->urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
+            // Clear out obsolete format
+            if (!isset($this->urls['regular']) || !isset($this->urls['inferred'])) {
+                $this->urls = array('regular' => array(), 'inferred' => array());
+            }
+        } elseif ($this->autoCrawl) {
+            $this->crawl();
+        } else {
+            throw new LogicException("Crawl hasn't been executed yet, and autoCrawl is set to false");
+        }
+    }
+
+    /**
+     * Re-execute the URL processor on all the fetched URLs
+     * @return void
+     */
+    public function reprocessUrls()
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        // Clear out all inferred URLs; these will be added
+        $this->urls['inferred'] = array();
+
+        // Reprocess URLs, in case the processing has changed since the last crawl
+        foreach ($this->urls['regular'] as $url => $oldProcessed) {
+            $processedURL = $this->generateProcessedURL($url);
+            $this->urls['regular'][$url] = $processedURL;
+
+            // Trigger parent URL back-filling on new processed URL
+            $this->parentProcessedURL($processedURL);
+        }
+
+        $this->saveURLs();
+    }
+
+    /**
+     *
+     * @param int $limit
+     * @param bool $verbose
+     * @return \StaticSiteCrawler
+     */
+    public function crawl($limit=false, $verbose=false)
+    {
+        increase_time_limit_to(3600);
+
+        if (!is_dir($this->cacheDir)) {
+            mkdir($this->cacheDir);
+        }
+
+        $crawler = new StaticSiteCrawler($this, $limit, $verbose);
+        $crawler->enableResumption();
+        $crawler->setUrlCacheType(PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE);
+        $crawler->setWorkingDirectory($this->cacheDir);
+
+        // Allow for resuming an incomplete crawl
+        if (file_exists($this->cacheDir.'crawlerid')) {
+            // We should re-load the partial list of URLs, if relevant
+            // This should only happen when we are resuming a partial crawl
+            if (file_exists($this->cacheDir . 'urls')) {
+                $this->urls = unserialize(file_get_contents($this->cacheDir . 'urls'));
+            } else {
+                $this->urls = array('regular' => array(), 'inferred' => array());
+            }
+            
+            $crawlerID = file_get_contents($this->cacheDir.'crawlerid');
+            $crawler->resume($crawlerID);
+        } else {
+            $crawlerID = $crawler->getCrawlerId();
+            file_put_contents($this->cacheDir.'/crawlerid', $crawlerID);
+            $this->urls = array('regular' => array(), 'inferred' => array());
+        }
+
+        $crawler->setURL($this->baseURL);
+        $crawler->go();
+
+        unlink($this->cacheDir.'crawlerid');
+
+        ksort($this->urls['regular']);
+        ksort($this->urls['inferred']);
+        $this->saveURLs();
+        return $crawler;
+    }
+
+    /**
+     * Save the current list of URLs to disk
+     * @return [type] [description]
+     */
+    public function saveURLs()
+    {
+        file_put_contents($this->cacheDir . 'urls', serialize($this->urls));
+    }
 
+    /**
+     * Add a URL to this list, given the absolute URL
+     * @param string $url The absolute URL
+     */
+    public function addAbsoluteURL($url)
+    {
+        $simpifiedURL = $this->simplifyURL($url);
+        $simpifiedBase = $this->simplifyURL($this->baseURL);
+
+        if (substr($simpifiedURL, 0, strlen($simpifiedBase)) == $simpifiedBase) {
+            $relURL = substr($url, strlen($this->baseURL));
+        } else {
+            throw new InvalidArgumentException("URL $url is not from the site $this->baseURL");
+        }
+
+        return $this->addURL($relURL);
+    }
+
+    public function addURL($url)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        // Generate and save the processed URLs
+        $this->urls['regular'][$url] = $this->generateProcessedURL($url);
+
+        // Trigger parent URL back-filling
+        $this->parentProcessedURL($this->urls['regular'][$url]);
+    }
+
+
+    /**
+     * Add an inferred URL to the list.
+     * 
+     * Since the unprocessed URL isn't available, we use the processed URL in its place.  This should be used with
+     * some caution.
+     * 
+     * @param string $processedURL The processed URL to add.
+     */
+    public function addInferredURL($inferredURL)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        // Generate and save the processed URLs
+        $this->urls['inferred'][$inferredURL] = $inferredURL;
+
+        // Trigger parent URL back-filling
+        $this->parentProcessedURL($inferredURL);
+    }
+
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * Return true if the given URL exists
+     * @param  string $url The URL, either absolute, or relative starting with "/"
+     * @return boolean     Does the URL exist
+     */
+    public function hasURL($url)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        // Try and relativise an absolute URL
+        if ($url[0] != '/') {
+            $simpifiedURL = $this->simplifyURL($url);
+            $simpifiedBase = $this->simplifyURL($this->baseURL);
+
+            if (substr($simpifiedURL, 0, strlen($simpifiedBase)) == $simpifiedBase) {
+                $url = substr($simpifiedURL, strlen($simpifiedBase));
+            } else {
+                throw new InvalidArgumentException("URL $url is not from the site $this->baseURL");
+            }
+        }
+
+        return isset($this->urls['regular'][$url]) || in_array($url, $this->urls['inferred']);
+    }
+
+    /**
+     * Simplify a URL.
+     * Ignores https/http differences and "www." / non differences.
+     * 
+     * @param  string $url
+     * @return string
+     */
+    protected function simplifyURL($url)
+    {
+        return preg_replace('#^https?://(www\.)?#i', 'http://www.', $url);
+    }
+
+    /**
+     * Returns true if the given URL is in the list of processed URls
+     * 
+     * @param  string  $processedURL The processed URL
+     * @return boolean               True if it exists, false otherwise
+     */
+    public function hasProcessedURL($processedURL)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        return in_array($processedURL, $this->urls['regular']) || in_array($processedURL, $this->urls['inferred']);
+    }
+
+    /**
+     * Return the processed URL that is the parent of the given one.
+     *
+     * Both input and output are processed URLs
+     * 
+     * @param  string $url A relative URL
+     * @return string      [description]
+     */
+    public function parentProcessedURL($processedURL)
+    {
+        if ($processedURL == "/") {
+            return "";
+        }
+
+        // URL heirachy can be broken down by querystring or by URL
+        $breakpoint = max(strrpos($processedURL, '?'), strrpos($processedURL, '/'));
+
+        // Special case for children of the root
+        if ($breakpoint == 0) {
+            return "/";
+        }
+
+        // Get parent URL
+        $parentProcessedURL = substr($processedURL, 0, $breakpoint);
+
+        // If an intermediary URL doesn't exist, create it
+        if (!$this->hasProcessedURL($parentProcessedURL)) {
+            $this->addInferredURL($parentProcessedURL);
+        }
+
+        return $parentProcessedURL;
+    }
+
+    /**
+     * Return the regular URL, given the processed one.
+     *
+     * Note that the URL processing isn't reversible, so this function works looks by iterating through all URLs.
+     * If the URL doesn't exist in the list, this function returns null.
+     * 
+     * @param  string $processedURL The URL after processing has been applied.
+     * @return string               The original URL.
+     */
+    public function unprocessedURL($processedURL)
+    {
+        if ($url = array_search($processedURL, $this->urls['regular'])) {
+            return $url;
+        } elseif (in_array($processedURL, $this->urls['inferred'])) {
+            return $processedURL;
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Find the processed URL in the URL list
+     * @param  [type] $url [description]
+     * @return [type]      [description]
+     */
+    public function processedURL($url)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        if (isset($this->urls['regular'][$url])) {
+            // Generate it if missing
+            if ($this->urls['regular'][$url] === true) {
+                $this->urls['regular'][$url] = $this->generateProcessedURL($url);
+            }
+            return $this->urls['regular'][$url];
+        } elseif (in_array($url, $this->urls['inferred'])) {
+            return $url;
+        }
+    }
+
+    /**
+     * Execute custom logic for processing URLs prior to heirachy generation.
+     *
+     * This can be used to implement logic such as ignoring the "/Pages/" parts of MOSS URLs, or dropping extensions.
+     * 
+     * @param  string $url The unprocessed URL
+     * @return string      The processed URL
+     */
+    public function generateProcessedURL($url)
+    {
+        if (!$url) {
+            throw new LogicException("Can't pass a blank URL to generateProcessedURL");
+        }
+        if ($this->urlProcessor) {
+            $url = $this->urlProcessor->processURL($url);
+        }
+        if (!$url) {
+            throw new LogicException(get_class($this->urlProcessor) . " returned a blank URL.");
+        }
+        return $url;
+    }
+
+    /**
+     * Return the URLs that are a child of the given URL
+     * @param  [type] $url [description]
+     * @return [type]      [description]
+     */
+    public function getChildren($url)
+    {
+        if ($this->urls === null) {
+            $this->loadUrls();
+        }
+
+        $processedURL = $this->processedURL($url);
+
+        // Subtly different regex if the URL ends in ? or /
+        if (preg_match('#[/?]$#', $processedURL)) {
+            $regEx = '#^'.preg_quote($processedURL, '#') . '[^/?]+$#';
+        } else {
+            $regEx = '#^'.preg_quote($processedURL, '#') . '[/?][^/?]+$#';
+        }
+
+        $children = array();
+        foreach ($this->urls['regular'] as $potentialChild => $potentialProcessedChild) {
+            if (preg_match($regEx, $potentialProcessedChild)) {
+                if (!isset($children[$potentialProcessedChild])) {
+                    $children[$potentialProcessedChild] = $potentialChild;
+                }
+            }
+        }
+        foreach ($this->urls['inferred'] as $potentialProcessedChild) {
+            if (preg_match($regEx, $potentialProcessedChild)) {
+                if (!isset($children[$potentialProcessedChild])) {
+                    $children[$potentialProcessedChild] = $potentialProcessedChild;
+                }
+            }
+        }
+
+        return array_values($children);
+    }
 }
 
-class StaticSiteCrawler extends PHPCrawler {
-	protected $urlList;
-
-	/**
-	 *
-	 * @var bool
-	 */
-	protected $verbose = false;
-
-	function __construct(StaticSiteUrlList $urlList, $limit=false, $verbose=false) {
-		parent::__construct();
-		$this->urlList = $urlList;
-		$this->verbose = $verbose;
-		if($limit) {
-			$this->setPageLimit($limit);
-		}
-	}
-
-	function handleHeaderInfo(PHPCrawlerResponseHeader $header) {
-		// Don't parse 400/500 responses
-		if($header->http_status_code > 399) {
-			$message = $header->source_url . " - skipped as it's $header->http_status_code".PHP_EOL;
-			error_log($message, 3, '/tmp/urls');
-			if($this->verbose) {
-				echo "[!] ".$message;
-			}
-			return -1;
-		}
-	}
-
-	function handleDocumentInfo(PHPCrawlerDocumentInfo $info) {
-		// Ignore errors and redirects
-		if($info->http_status_code < 200) return;
-		if($info->http_status_code > 299) return;
-
-		// Ignore non HTML
-		if(!preg_match('#/x?html#', $info->content_type)) return;
-
-		$this->urlList->addAbsoluteURL($info->url);
-		if($this->verbose) {
-			echo "[+] ".$info->url.PHP_EOL;
-		}
-		$this->urlList->saveURLs();
-	}
-
-	protected function initCrawlerProcess() {
-		parent::initCrawlerProcess();
-
-		// Add additional URLs to crawl to the crawler's LinkCache
-		// NOTE: This is using an undocumented API
-		if($extraURLs = $this->urlList->getExtraCrawlURLs()) {
-			foreach($extraURLs as $extraURL) {
-    			$this->LinkCache->addUrl(new PHPCrawlerURLDescriptor($extraURL));
-    		}
-    	}
-
-		// Prevent URLs that matches the exclude patterns to be fetched
-		if($excludePatterns = $this->urlList->getExcludePatterns()) {
-			foreach($excludePatterns as $pattern) {
-				$validRegExp = $this->addURLFilterRule('|'.str_replace('|', '\|', $pattern).'|');
-
-				if(!$validRegExp) {
-					throw new InvalidArgumentException('Exclude url pattern "'.$pattern.'" is not a valid regular expression.');
-				}
-			}
-		}
+class StaticSiteCrawler extends PHPCrawler
+{
+    protected $urlList;
+
+    /**
+     *
+     * @var bool
+     */
+    protected $verbose = false;
+
+    public function __construct(StaticSiteUrlList $urlList, $limit=false, $verbose=false)
+    {
+        parent::__construct();
+        $this->urlList = $urlList;
+        $this->verbose = $verbose;
+        if ($limit) {
+            $this->setPageLimit($limit);
+        }
     }
-}
\ No newline at end of file
+
+    public function handleHeaderInfo(PHPCrawlerResponseHeader $header)
+    {
+        // Don't parse 400/500 responses
+        if ($header->http_status_code > 399) {
+            $message = $header->source_url . " - skipped as it's $header->http_status_code".PHP_EOL;
+            error_log($message, 3, '/tmp/urls');
+            if ($this->verbose) {
+                echo "[!] ".$message;
+            }
+            return -1;
+        }
+    }
+
+    public function handleDocumentInfo(PHPCrawlerDocumentInfo $info)
+    {
+        // Ignore errors and redirects
+        if ($info->http_status_code < 200) {
+            return;
+        }
+        if ($info->http_status_code > 299) {
+            return;
+        }
+
+        // Ignore non HTML
+        if (!preg_match('#/x?html#', $info->content_type)) {
+            return;
+        }
+
+        $this->urlList->addAbsoluteURL($info->url);
+        if ($this->verbose) {
+            echo "[+] ".$info->url.PHP_EOL;
+        }
+        $this->urlList->saveURLs();
+    }
+
+    protected function initCrawlerProcess()
+    {
+        parent::initCrawlerProcess();
+
+        // Add additional URLs to crawl to the crawler's LinkCache
+        // NOTE: This is using an undocumented API
+        if ($extraURLs = $this->urlList->getExtraCrawlURLs()) {
+            foreach ($extraURLs as $extraURL) {
+                $this->LinkCache->addUrl(new PHPCrawlerURLDescriptor($extraURL));
+            }
+        }
+
+        // Prevent URLs that matches the exclude patterns to be fetched
+        if ($excludePatterns = $this->urlList->getExcludePatterns()) {
+            foreach ($excludePatterns as $pattern) {
+                $validRegExp = $this->addURLFilterRule('|'.str_replace('|', '\|', $pattern).'|');
+
+                if (!$validRegExp) {
+                    throw new InvalidArgumentException('Exclude url pattern "'.$pattern.'" is not a valid regular expression.');
+                }
+            }
+        }
+    }
+}
diff --git a/code/StaticSiteUrlProcessor.php b/code/StaticSiteUrlProcessor.php
index f762bf1..4b443f1 100644
--- a/code/StaticSiteUrlProcessor.php
+++ b/code/StaticSiteUrlProcessor.php
@@ -13,75 +13,88 @@
  *
  * More sophisticated processing might be done to facilitate importing of less 
  */
-interface StaticSiteUrlProcessor {
+interface StaticSiteUrlProcessor
+{
 
-	/**
-	 * Return a name for the style of URLs to be processed.
-	 * 
-	 * This name will be shown in the CMS when users are configuring the content import.
-	 * 
-	 * @return string The name, in plaintext (no HTML)
-	 */
-	function getName();
+    /**
+     * Return a name for the style of URLs to be processed.
+     * 
+     * This name will be shown in the CMS when users are configuring the content import.
+     * 
+     * @return string The name, in plaintext (no HTML)
+     */
+    public function getName();
 
-	/**
-	 * Return an explanation of what processing is done.
-	 * 
-	 * This explanation will be shown in the CMS when users are configuring the content import.
-	 * 
-	 * @return string The description, in plaintext (no HTML)
-	 */
-	function getDescription();
+    /**
+     * Return an explanation of what processing is done.
+     * 
+     * This explanation will be shown in the CMS when users are configuring the content import.
+     * 
+     * @return string The description, in plaintext (no HTML)
+     */
+    public function getDescription();
 
 
-	/**
-	 * Return a description for this processor, to be shown in the CMS.
-	 * @param string $url The unprocessed URL
-	 * @return string The name
-	 */
-	function processURL($url);
+    /**
+     * Return a description for this processor, to be shown in the CMS.
+     * @param string $url The unprocessed URL
+     * @return string The name
+     */
+    public function processURL($url);
 }
 
 /**
  * Processor for MOSS URLs
  */
-class StaticSiteURLProcessor_DropExtensions implements StaticSiteUrlProcessor {
-	function getName() {
-		return "Simple clean-up (recommended)";
-	}
+class StaticSiteURLProcessor_DropExtensions implements StaticSiteUrlProcessor
+{
+    public function getName()
+    {
+        return "Simple clean-up (recommended)";
+    }
 
-	function getDescription() {
-		return "Drop file extensions and trailing slashes on URLs but otherwise leave them the same";
-	}
+    public function getDescription()
+    {
+        return "Drop file extensions and trailing slashes on URLs but otherwise leave them the same";
+    }
 
-	function processURL($url) {
-		if(preg_match('/^([^?]*)\?(.*)$/', $url, $matches)) {
-			$url = $matches[1];
-			$qs = $matches[2];
-			if($url != '/') $url = preg_replace('#/$#','',$url);
-			$url = preg_replace('#\.[^.]*$#','',$url);
-			return "$url?$qs";
-		} else {
-			if($url != '/') $url = preg_replace('#/$#','',$url);
-			$url = preg_replace('#\.[^.]*$#','',$url);
-			return $url;
-		}
-	}
+    public function processURL($url)
+    {
+        if (preg_match('/^([^?]*)\?(.*)$/', $url, $matches)) {
+            $url = $matches[1];
+            $qs = $matches[2];
+            if ($url != '/') {
+                $url = preg_replace('#/$#', '', $url);
+            }
+            $url = preg_replace('#\.[^.]*$#', '', $url);
+            return "$url?$qs";
+        } else {
+            if ($url != '/') {
+                $url = preg_replace('#/$#', '', $url);
+            }
+            $url = preg_replace('#\.[^.]*$#', '', $url);
+            return $url;
+        }
+    }
 }
 /**
  * Processor for MOSS URLs
  */
-class StaticSiteMOSSURLProcessor extends StaticSiteURLProcessor_DropExtensions implements StaticSiteUrlProcessor {
-	function getName() {
-		return "MOSS-style URLs";
-	}
+class StaticSiteMOSSURLProcessor extends StaticSiteURLProcessor_DropExtensions implements StaticSiteUrlProcessor
+{
+    public function getName()
+    {
+        return "MOSS-style URLs";
+    }
 
-	function getDescription() {
-		return "Remove '/Pages/' from the URL, and drop extensions";
-	}
+    public function getDescription()
+    {
+        return "Remove '/Pages/' from the URL, and drop extensions";
+    }
 
-	function processURL($url) {
-		$url = str_ireplace('/Pages/','/',$url);
-		return parent::processURL($url);
-	}
+    public function processURL($url)
+    {
+        $url = str_ireplace('/Pages/', '/', $url);
+        return parent::processURL($url);
+    }
 }
diff --git a/code/tasks/ExternalContentImportContentTask.php b/code/tasks/ExternalContentImportContentTask.php
index 3987544..5e06c3d 100644
--- a/code/tasks/ExternalContentImportContentTask.php
+++ b/code/tasks/ExternalContentImportContentTask.php
@@ -3,35 +3,37 @@
 /**
  * External content - run import as a build task, importing content into a new container
  */
-class ExternalContentImportContentTask extends BuildTask {
+class ExternalContentImportContentTask extends BuildTask
+{
 
-	function run($request) {
-		$id = $request->getVar('ID');
-		if((!is_numeric($id) && !preg_match('/^[0-9]+_[0-9]+$/', $id)) || !$id) {
-			echo "<p>Specify ?ID=(number) or ?ID=(ID)_(Code)</p>\n";
-			return;
-		}
+    public function run($request)
+    {
+        $id = $request->getVar('ID');
+        if ((!is_numeric($id) && !preg_match('/^[0-9]+_[0-9]+$/', $id)) || !$id) {
+            echo "<p>Specify ?ID=(number) or ?ID=(ID)_(Code)</p>\n";
+            return;
+        }
 
-		$includeSelected 		= false;
-		$includeChildren 		= true;
-		$duplicates 			= 'Duplicate';
-		$selected 				= $id;
+        $includeSelected        = false;
+        $includeChildren        = true;
+        $duplicates            = 'Duplicate';
+        $selected                = $id;
 
-		$target = new Page;
-		$target->Title = "Import on " . date('Y-m-d H:i:s');
-		$target->write();
-		$targetType = 'SiteTree';
+        $target = new Page;
+        $target->Title = "Import on " . date('Y-m-d H:i:s');
+        $target->write();
+        $targetType = 'SiteTree';
 
-		$from = ExternalContent::getDataObjectFor($selected);
-		if ($from instanceof ExternalContentSource) {
-			$selected = false;
-		}
+        $from = ExternalContent::getDataObjectFor($selected);
+        if ($from instanceof ExternalContentSource) {
+            $selected = false;
+        }
 
-		$importer = null;
-		$importer = $from->getContentImporter($targetType);
+        $importer = null;
+        $importer = $from->getContentImporter($targetType);
 
-		if ($importer) {
-			$importer->import($from, $target, $includeSelected, $includeChildren, $duplicates);
-		}
-	}
+        if ($importer) {
+            $importer->import($from, $target, $includeSelected, $includeChildren, $duplicates);
+        }
+    }
 }
diff --git a/code/tasks/StaticSiteCrawlURLsTask.php b/code/tasks/StaticSiteCrawlURLsTask.php
index 1366102..eeceb69 100644
--- a/code/tasks/StaticSiteCrawlURLsTask.php
+++ b/code/tasks/StaticSiteCrawlURLsTask.php
@@ -4,17 +4,18 @@
  * StaticSiteCrawlURLs
  *
  */
-class StaticSiteCrawlURLsTask extends BuildTask {
-
-	function run($request) {
-		$id = $request->getVar('ID');
-		if(!is_numeric($id) || !$id) {
-			echo "<p>Specify ?ID=(number)</p>";
-			return;
-		}
-		// Find all pages
-		$contentSource = StaticSiteContentSource::get()->byID($id);
-		$contentSource->urllist()->crawl(false, true);
-	}
+class StaticSiteCrawlURLsTask extends BuildTask
+{
 
+    public function run($request)
+    {
+        $id = $request->getVar('ID');
+        if (!is_numeric($id) || !$id) {
+            echo "<p>Specify ?ID=(number)</p>";
+            return;
+        }
+        // Find all pages
+        $contentSource = StaticSiteContentSource::get()->byID($id);
+        $contentSource->urllist()->crawl(false, true);
+    }
 }
diff --git a/code/tasks/StaticSiteRewriteLinksTask.php b/code/tasks/StaticSiteRewriteLinksTask.php
index c91c902..1089fce 100644
--- a/code/tasks/StaticSiteRewriteLinksTask.php
+++ b/code/tasks/StaticSiteRewriteLinksTask.php
@@ -3,70 +3,71 @@
 /**
  * Rewrite all links in content imported via staticsiteimporter
  */
-class StaticSiteRewriteLinksTask extends BuildTask {
-	
-	function run($request) {
-		$id = $request->getVar('ID');
-		if(!is_numeric($id) || !$id) {
-			echo "<p>Specify ?ID=(number)</p>";
-			return;
-		}
+class StaticSiteRewriteLinksTask extends BuildTask
+{
+    
+    public function run($request)
+    {
+        $id = $request->getVar('ID');
+        if (!is_numeric($id) || !$id) {
+            echo "<p>Specify ?ID=(number)</p>";
+            return;
+        }
 
-		// Find all pages
-		$contentSource = StaticSiteContentSource::get()->byID($id);
-		$pages = $contentSource->Pages();
+        // Find all pages
+        $contentSource = StaticSiteContentSource::get()->byID($id);
+        $pages = $contentSource->Pages();
 
-		echo "<p>Looking through " . $pages->Count() . " pages</p>\n";
+        echo "<p>Looking through " . $pages->Count() . " pages</p>\n";
 
-		// Set up rewriter
-		$pageLookup = $pages->map('StaticSiteURL', 'ID');
-		$baseURL = $contentSource->BaseUrl;
+        // Set up rewriter
+        $pageLookup = $pages->map('StaticSiteURL', 'ID');
+        $baseURL = $contentSource->BaseUrl;
 
-		$rewriter = new StaticSiteLinkRewriter(function($url) use($pageLookup, $baseURL) {
-			$fragment = "";
-			if(strpos($url,'#') !== false) {
-				list($url,$fragment) = explode('#', $url, 2);
-				$fragment = '#'.$fragment;
-			}
+        $rewriter = new StaticSiteLinkRewriter(function ($url) use ($pageLookup, $baseURL) {
+            $fragment = "";
+            if (strpos($url, '#') !== false) {
+                list($url, $fragment) = explode('#', $url, 2);
+                $fragment = '#'.$fragment;
+            }
 
-			if($pageLookup[$url]) {
-				return '[sitetree_link,id='.$pageLookup[$url] .']' . $fragment;
-			
-			} else {
-				if(substr($url,0,strlen($baseURL)) == $baseURL) {
-					echo "<p>WARNING: $url couldn't be rewritten.</p>\n";
-				}
-				return $url . $fragment;
-			}
-		});
+            if ($pageLookup[$url]) {
+                return '[sitetree_link,id='.$pageLookup[$url] .']' . $fragment;
+            } else {
+                if (substr($url, 0, strlen($baseURL)) == $baseURL) {
+                    echo "<p>WARNING: $url couldn't be rewritten.</p>\n";
+                }
+                return $url . $fragment;
+            }
+        });
 
-		// Perform rewriting
-		$changedFields = 0;
-		foreach($pages as $page) {
+        // Perform rewriting
+        $changedFields = 0;
+        foreach ($pages as $page) {
+            $schema = $contentSource->getSchemaForURL($page->URLSegment);
+            // Get fields to process
+            $fields = array();
+            foreach ($schema->ImportRules() as $rule) {
+                if (!$rule->PlainText) {
+                    $fields[] = $rule->FieldName;
+                }
+            }
+            $fields = array_unique($fields);
+            
 
-			$schema = $contentSource->getSchemaForURL($page->URLSegment);
-			// Get fields to process
-			$fields = array();
-			foreach($schema->ImportRules() as $rule) {
-				if(!$rule->PlainText) $fields[] = $rule->FieldName;
-			}
-			$fields = array_unique($fields);
-			
+            foreach ($fields as $field) {
+                $newContent = $rewriter->rewriteInContent($page->$field);
+                if ($newContent != $page->$field) {
+                    $newContent = str_replace(array('%5B', '%5D'), array('[', ']'), $newContent);
+                    $changedFields++;
 
-			foreach($fields as $field) {
-				$newContent = $rewriter->rewriteInContent($page->$field);
-				if($newContent != $page->$field) {
-					$newContent = str_replace(array('%5B','%5D'),array('[',']'),$newContent);
-					$changedFields++;
+                    echo "<p>Changed $field on $page->Title (#$page->ID).</p>";
+                    $page->$field = $newContent;
+                }
+            }
 
-					echo "<p>Changed $field on $page->Title (#$page->ID).</p>";
-					$page->$field = $newContent;
-				}
-			}
-
-			$page->write();
-		}
-		echo "<p>DONE. Amended $changedFields content fields.</p>".PHP_EOL;
-
-	} 
-}
\ No newline at end of file
+            $page->write();
+        }
+        echo "<p>DONE. Amended $changedFields content fields.</p>".PHP_EOL;
+    }
+}