Revision: 68228
Updated Code
at December 16, 2014 01:13 by nigelnquande
Updated Code
function html_table_to_plain($html, $id = NULL) { $plain_message = ''; $DOM = new DOMDocument(); $doc = $DOM->loadHTML($html); if ($doc === FALSE) { $plain_message = 'Failed to create DOM from HTML!'; throw new DOMException('Could not Load HTML into DOMDocument'); return $plain_message; } $titles_list = $DOM->getElementsByTagName('title'); if ($titles_list->length) $plain_message = '# '. $titles_list->item(0)->nodeValue ."\n\n"; $tables_list = $DOM->getElementsByTagName('table'); if (is_string($id) && !empty($id)) $tables_list = $DOM->getElementById($id); if (!empty($tables_list)) { foreach ($tables_list as $table) { $plain_message .= "+==========\n"; $rows = $table->getElementsByTagName('tr'); if (!empty($rows)) { $r = 0; $rows_arr = iterator_to_array($rows); for ($r = 0; $r < count($rows_arr); $r++) { $headers = iterator_to_array($rows_arr[$r]->getElementsByTagName('th')); $cells = iterator_to_array($rows_arr[$r]->getElementsByTagName('td')); if (count($headers) > 0) { $plain_message .= '| '; foreach ($headers as $th) { $plain_message .= '__'. $th->nodeValue .'__ | '; } } if (count ($cells) > 0) { foreach ($cells as $cell) { $links = iterator_to_array($cell->getElementsByTagName('a')); if (count($links)) { foreach ($links as $l) { $plain_message .= '['; $imgs = iterator_to_array( $l->getElementsByTagName('img')); if (count($imgs) > 0) { foreach ($imgs as $img) { $plain_message .='!['. $img->getAttribute('alt') .']('. $img->getAttribute('src') .')'; } } else { $plain_message .= $l->nodeValue; } $plain_message .= ']('. $l->getAttribute('href') .')'; } } else $plain_message .= $cell->nodeValue; $plain_message .= ' | '; } } if ($r < (count($rows_arr) -1)) $plain_message .= "\n+----------\n"; } } $plain_message .= "\n+==========\n"; } } $plain_message = preg_replace('|(?mi-Us)[ ]{2,}|', ' ', $plain_message); return $plain_message ; }
Revision: 68227
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at December 16, 2014 01:11 by nigelnquande
Initial Code
function html_table_to_plain($html, $id = NULL) { $plain_message = ''; $DOM = new DOMDocument(); $doc = $DOM->loadHTML($html); if ($doc === FALSE) { $plain_message = 'Failed to create DOM from HTML!'; throw new DOMException('Could not Load HTML into DOMDocument'); return $plain_message; } $titles_list = $DOM->getElementsByTagName('title'); if ($titles_list->length) $plain_message = '# '. $titles_list->item(0)->nodeValue ."\n\n"; $tables_list = $DOM->getElementsByTagName('table'); if (is_string($id) && !empty($id)) $tables_list = $DOM->getElementById($id); if (!empty($tables_list)) { foreach ($tables_list as $table) { $plain_message .= "+==========\n"; $rows = $table->getElementsByTagName('tr'); if (!empty($rows)) { $r = 0; $rows_arr = iterator_to_array($rows); for ($r = 0; $r < count($rows_arr); $r++) { $headers = iterator_to_array($rows_arr[$r]->getElementsByTagName('th')); $cells = iterator_to_array($rows_arr[$r]->getElementsByTagName('td')); if (count($headers) > 0) { $plain_message .= '| '; foreach ($headers as $th) { $plain_message .= '__'. $th->nodeValue .'__ | '; } } if (count ($cells) > 0) { foreach ($cells as $cell) { $links = iterator_to_array($cell->getElementsByTagName('a')); if (count($links)) { foreach ($links as $l) { $plain_message .= '['; $imgs = iterator_to_array( $l->getElementsByTagName('img')); if (count($imgs) > 0) { foreach ($imgs as $img) { $plain_message .='!['. $img->getAttribute('alt') .']('. $img->getAttribute('src') .')'; } } else { $plain_message .= $l->nodeValue; } $plain_message .= ']('. $l->getAttribute('href') .')'; } } else $plain_message .= $cell->nodeValue; $plain_message .= ' | '; } } if ($r < (count($rows_arr) -1)) $plain_message .= "\n+----------\n"; } } $plain_message .= "\n+==========\n"; } } $plain_message = preg_replace('|(?mi-Us)[ ]{2,}|', ' ', $plain_message); return $plain_message ; }
Initial URL
Initial Description
Use PHP's DOM parser to convert a table into plain text (including links with images)
Initial Title
HTML Table to plain text
Initial Tags
php, table, html, DOM, text
Initial Language
PHP