flash2html.class.php
test.php
Code:
<?php /** * Transform the awful HTML of Flash into standardized HTML. */ class Flash2HTML { private $parser; private $nodes = array(); private $entities = array(); private $content = NULL; private $dataHandler = NULL; public $plainText = FALSE; public $protectEmail = FALSE; public function Flash2HTML() { $this->setTagTransformation('B', 'strong'); $this->setTagTransformation('I', 'em'); $this->setTagTransformation('LI', 'li'); $this->setStartTagHandler('A', array($this, "a_startTagHandler")); $this->setStartTagHandler('FONT', array($this, "font_startTagHandler")); $this->setStartTagHandler('IMG', array($this, "img_startTagHandler")); $this->setStartTagHandler("P", array($this, "p_startTagHandler")); $this->setEndTagHandler("P", array($this, "p_endTagHandler")); $this->setStartTagHandler('TEXTFORMAT', array($this, "textformat_startTagHandler")); $this->setStartTagHandler('U', array($this, "u_startTagHandler")); } /** * * @param String $data * @param Array $properties * @return String */ public function __invoke($data, $properties = array()) { return $this->html($data, $properties); } /** * * @param String $tag */ public function ignoreTag($tag) { $offset = array_search($tag, array_keys($this->entities)); if ($offset !== FALSE) { array_splice($this->entities, $offset, 1); } } /** * * @param String $from_tag * @param String $to_tag */ public function setTagTransformation($from_tag, $to_tag) { if (preg_match("/^\w+$/", $to_tag, $matches)) { $this->entities[$from_tag]["tag"] = $matches[0]; $this->entities[$from_tag]["attributes"] = NULL; $this->entities[$from_tag]["closed"] = FALSE; $this->entities[$from_tag]["start_tag_handler"] = NULL; $this->entities[$from_tag]["data_tag_handler"] = NULL; $this->entities[$from_tag]["end_tag_handler"] = NULL; } else if (preg_match("/^<(\w+)\s+(.*)(\/?)>$/U", $to_tag, $matches)) { $this->entities[$from_tag]["tag"] = $matches[1]; $this->entities[$from_tag]["attributes"] = $matches[2]; $this->entities[$from_tag]["closed"] = $matches[3] == "/"; $this->entities[$from_tag]["start_tag_handler"] = NULL; $this->entities[$from_tag]["data_tag_handler"] = NULL; $this->entities[$from_tag]["end_tag_handler"] = NULL; } } /** * * @param String $tag * @param Function $start_tag_handler */ public function setStartTagHandler($tag, $start_tag_handler) { if (!array_key_exists($tag, $this->entities)) { $this->setTagTransformation($tag, strtolower($tag)); } $this->entities[$tag]["start_tag_handler"] = $start_tag_handler; } /** * * @param String $tag * @param Function $data_handler */ public function setDataHandler($data_handler) { $this->dataHandler = $data_handler; } /** * * @param String $tag * @param Function $end_tag_handler */ public function setEndTagHandler($tag, $end_tag_handler) { if (!array_key_exists($tag, $this->entities)) { $this->setTagTransformation($tag, strtolower($tag)); } $this->entities[$tag]["end_tag_handler"] = $end_tag_handler; } public function removeStartTagHandler($tag) { $this->entities[$tag]["start_tag_handler"] = NULL; } public function removeDataTagHandler($tag) { $this->entities[$tag]["data_tag_handler"] = NULL; } public function removeEndTagHandler($tag) { $this->entities[$tag]["end_tag_handler"] = NULL; } private function p_startTagHandler($parser, $tag, $attrs) { return NULL; } private function p_endTagHandler($parser, $tag) { return "<br />\n"; } /** * * @param String $data * @param Array $properties * @return String */ public function html($data, $properties = array()) { $uniqid = uniqid("unicode_"); $data = json_encode($data); $data = preg_replace('/\\\u([0-9a-z]{4})/', "$uniqid\$1", $data); $data = json_decode($data); $original_properties = array(); foreach ($properties as $key => $value) { $original_properties[$key] = $this->$key; $this->$key = $value; } $data = $this->prepare($data); $this->parser = xml_parser_create(); xml_set_object($this->parser, $this); xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, TRUE); xml_set_element_handler($this->parser, "startTagHandler", "endTagHandler"); xml_set_character_data_handler($this->parser, "dataHandler"); xml_parse($this->parser, $data); xml_parser_free($this->parser); $ret = $this->content; $this->content = NULL; $ret = utf8_decode(str_replace(array('–', '’'), array('–', '’'), $ret)); if ($this->protectEmail) { $ret = preg_replace_callback("/mailto:(.*)\"/U", array($this, "protectEmailCallback"), $ret); } // restore original properties foreach ($original_properties as $key => $value) { $this->$key = $value; } $ret = preg_replace("/$uniqid([0-9a-z]{4})/", '&#x$1;', $ret); $ret = preg_replace(array("/<br \/>\n$/", "/<br \/>\n<\/div>$/"), array(NULL, "</div>"), $ret); $ret = preg_replace("/<span[^>]*><\/span>/U", NULL, $ret); $ret = preg_replace("/<a([^>]*)><span style=\"text-decoration: underline; \">(.*)<\/span><\/a>/U", "<a\$1>\$2</a>", $ret); $ret = preg_replace("/<li>(.*)<\/li>/", "<ul><li>$1</li>\n</ul>\n", $ret); $ret = preg_replace("/<li>/", "\n\t<li>", $ret); $ret = preg_replace("/<br \/>\n<ul>/", "<ul>", $ret); $ret = preg_replace("/<\/ul>\n<br \/>/", "</ul>", $ret); return $ret; } private function prepare($data) { if (!$this->plainText) { $data = preg_replace_callback("/HREF=\"(.*)\"/U", array($this, "fixHREFCallback"), $data); $data = preg_replace("/<IMG(.*)>/U", "<IMG$1 />", $data); } return "<root>$data</root>"; } private function startTagHandler($parser, $tag, $attrs) { if ($this->plainText) { return; } if (!key_exists($tag, $this->entities)) { return; } $entity = $this->entities[$tag]; if ($entity['start_tag_handler'] != NULL) { $str = call_user_func($entity["start_tag_handler"], $parser, $tag, $attrs); if (preg_match("/^<(\w+)/", $str, $matches)) { $tag_name = $matches[1]; $entity["tag"] = $tag_name; $entity["closed"] = preg_match("/.*\/>/", $str) === 1; } $this->content .= $str; } else { $this->content .= "<$entity[tag]"; if (strlen($entity["attributes"]) > 0) { $this->content .= " $entity[attributes]"; } if ($entity["closed"]) { $this->content .= " />"; } else { $this->content .= ">"; } } array_push($this->nodes, $entity); } private function dataHandler($parser, $cdata) { if ($this->plainText) { $this->content .= $cdata; } else { $str = NULL; if ($cdata == "&") { $str = "&"; } else { $str = preg_replace_callback("/\s{2,}/", array($this, "replaceSpacesCallback"), $cdata); } if ($this->dataHandler != NULL) { $str = call_user_func($this->dataHandler, $parser, $cdata); } $this->content .= $str; } } private function endTagHandler($parser, $tag) { if (!key_exists($tag, $this->entities)) { return; } if ($this->plainText) { if ($tag == "P") { $this->content .= "\n"; } return; } $entity = array_pop($this->nodes); if (!$entity["closed"]) { if ($entity['end_tag_handler'] != NULL) { $this->content .= call_user_func($entity["end_tag_handler"], $parser, $tag); } else { $this->content .= "</$entity[tag]>"; } } } private function a_startTagHandler($parser, $tag, $attrs) { $ret = '<a href="' . htmlspecialchars($attrs["HREF"]) . '"'; $ret .= array_key_exists("TARGET", $attrs) && (strlen($attrs["TARGET"]) > 0) ? ' target="' . $attrs["TARGET"] . '"' : NULL; $ret .= '>'; return $ret; } private function font_startTagHandler($parser, $tag, $attrs) { $ret = '<span style="'; $ret .= array_key_exists("FACE", $attrs) ? "font-family: '" . $attrs["FACE"] . "'; " : NULL; $ret .= array_key_exists("SIZE", $attrs) ? "font-size: " . $attrs["SIZE"] . "px; " : NULL; $ret .= array_key_exists("COLOR", $attrs) ? "color: " . $attrs["COLOR"] . "; " : NULL; $ret .= array_key_exists("LEADING", $attrs) ? "line-height: " . $attrs["LEADING"] . "px; " : NULL; $ret .= array_key_exists("LETTERSPACING", $attrs) ? "letter-spacing: " . $attrs["LETTERSPACING"] . "px; " : NULL; $ret .= '">'; return $ret; } private function img_startTagHandler($parser, $tag, $attrs) { $style = $attrs["ALIGN"] == "right" ? "float: right; margin-left: 10px; " : "float: left; margin-right: 10px; "; $ret = "<img style=\"$style\" src=\"$attrs[SRC]\" alt=\"\" />"; return $ret; } private function textformat_startTagHandler($parser, $tag, $attrs) { $ret = NULL; if (array_key_exists("BLOCKINDENT", $attrs) || array_key_exists("LEADING", $attrs)) { $ret = '<div style="'; $ret .= array_key_exists("BLOCKINDENT", $attrs) ? "margin-left: " . $attrs["BLOCKINDENT"] . "px; " : NULL; $ret .= array_key_exists("LEADING", $attrs) ? "margin-bottom: " . $attrs["LEADING"] . "px; " : NULL; $ret .= '">'; } return $ret; } private function u_startTagHandler($parser, $tag, $attrs) { $ret = '<span style="text-decoration: underline; ">'; return $ret; } private function replaceSpacesCallback($matches) { $matches[0]; return str_repeat(" ", strlen($matches[0])); } private function protectEmailCallback($matches) { $ret = NULL; $str = $matches[1]; $len = strlen($str); for ($i = 0; $i < $len; $i++) { $ret .= "&#x" . strtoupper(dechex(ord($str[$i]))) . ";"; } return "mailto:$ret\""; } private function fixHREFCallback($matches) { return 'HREF="' . htmlentities($matches[1]) . '"'; } } ?>
Code:
<?php require_once "classes/flash2html.class.php"; // HTML Flash $text = '<P ALIGN="LEFT"><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"><B>Integer sed lacus libero</B>, tempus sodales nisi. In hac habitasse platea dictumst. Pellentesque nec odio est, a iaculis lacus. Maecenas ante ligula, pellentesque in mattis sed, scelerisque nec dolor. Duis lorem enim, pretium vel luctus ut, <I>varius ut nunc</I>.</FONT></P><P ALIGN="LEFT"><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"></FONT></P><LI><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"><A HREF="http://www.php.net" TARGET=""><U>Vestibulum</U></A> sed risus et nunc consequat faucibus.</FONT></LI><LI><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Sed quam metus, consequat nec dignissim quis, mattis ac quam.</FONT></LI><LI><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"><A HREF="mailto:qkehkrqnqweh@wkjehkrw.com" TARGET=""><U>Maecenas</U></A> venenatis volutpat purus, vitae sagittis enim cursus eget.</FONT></LI><LI><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Proin elementum hendrerit nibh sit amet elementum.</FONT></LI><P ALIGN="LEFT"><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"></FONT></P><P ALIGN="LEFT"><FONT FACE="Verdana" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0"><U>Nullam tempus molestie sem</U>, non venenatis ligula suscipit a. Suspendisse dignissim, nulla quis euismod iaculis, libero lectus aliquet dui, sit amet varius ante nulla eget eros. Nulla facilisi. Cras et erat dui.</FONT></P>'; $f = new Flash2HTML(); ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> <title>Test</title> </head> <body> <?php echo $f->html($text); ?> </body> </html>