Blob Blame History Raw
--- program/lib/html2text.php.orig	2008-08-30 07:35:36.000000000 -0500
+++ program/lib/html2text.php	2008-12-12 10:09:19.000000000 -0600
@@ -152,2 +151,0 @@
-        '/<h[123][^>]*>(.*?)<\/h[123]>/ie',      // H1 - H3
-        '/<h[456][^>]*>(.*?)<\/h[456]>/ie',      // H4 - H6
@@ -156,2 +153,0 @@
-        '/<b[^>]*>(.*?)<\/b>/ie',                // <b>
-        '/<strong[^>]*>(.*?)<\/strong>/ie',      // <strong>
@@ -164,2 +159,0 @@
-        '/<a [^>]*href=("|\')([^"\']+)\1[^>]*>(.*?)<\/a>/ie',
-                                                 // <a href="">
@@ -170 +163,0 @@
-        '/<th[^>]*>(.*?)<\/th>/ie',              // <th> and </th>
@@ -204,2 +196,0 @@
-        "strtoupper(\"\n\n\\1\n\n\")",          // H1 - H3
-        "ucwords(\"\n\n\\1\n\")",             // H4 - H6
@@ -208,2 +198,0 @@
-        'strtoupper("\\1")',                    // <b>
-        'strtoupper("\\1")',                    // <strong>
@@ -216,2 +204,0 @@
-    	'$this->_build_link_list("\\2", "\\3")',
-    	                                	// <a href="">
@@ -222 +208,0 @@
-        "strtoupper(\"\t\t\\1\n\")",            // <th> and </th>
@@ -235 +221 @@
-        '£',
+        '£',
@@ -240,0 +227,16 @@
+    /**
+     *  List of preg* regular expression patterns to search for
+     *  and replace using callback function.
+     *
+     *  @var array $callback_search
+     *  @access public
+     */
+    var $callback_search = array(
+        '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3
+        '/<(b)[^>]*>(.*?)<\/b>/i',                 // <b>
+        '/<(strong)[^>]*>(.*?)<\/strong>/i',       // <strong>
+        '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
+                                                   // <a href="">
+        '/<(th)[^>]*>(.*?)<\/th>/i',               // <th> and </th>
+    );
+
@@ -468 +470,4 @@
-	
+
+	// Replace known html entities
+	$text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
+
@@ -470,0 +476 @@
+        $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
@@ -546 +552 @@
-	    $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text);
+	    $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1);
@@ -548,0 +555,38 @@
+
+    /**
+     *  Callback function for preg_replace_callback use.
+     *
+     *  @param  array PREG matches
+     *  @return string
+     *  @access private
+     */
+    function _preg_callback($matches)
+    {
+	switch($matches[1])
+	{
+	    case 'b':
+	    case 'strong':
+		return $this->_strtoupper($matches[2]);
+	    case 'hr':
+		return $this->_strtoupper("\t\t". $matches[2] ."\n");
+	    case 'h':
+		return $this->_strtoupper("\n\n". $matches[2] ."\n\n");
+	    case 'a':
+    	        return $this->_build_link_list($matches[3], $matches[4]);
+	}
+    }
+    
+    /**
+     *  Strtoupper multibyte wrapper function
+     *
+     *  @param  string
+     *  @return string
+     *  @access private
+     */
+    function _strtoupper($str)
+    {
+	if (function_exists('mb_strtoupper'))
+    	    return mb_strtoupper($str);
+    	else
+	    return strtoupper($str);
+    }