/ Published in: PHP
I’ve recently had to quickly parse an Apache log file, to get basic usage statistics for a site over a month and wanted the ability to quickly process any general logs and store them in a MySQL table, so I’ve made a small package class.log.
Included in the package are:
class.log.php (the main class)
class.log.processor.php (an interface for a log processor, very basic!!)
class.log.output.php (a simple extension of the processor (outputs <p> tags with s)
class.log.mysql.php (a simple mysql importer of the processed data)
parse.php (a simple implementation, bringing all the classes together)
The class is designed to use one line at a time from the log retrieved, and the regular expression specified in class.log.php can be modified to parse different types of logs as long as the matches array is also updated.
I’ve only used this for apache logs currently, which it managed quite well, I’m not sure if I’d use this script in an automated script, but I’ll leave that for you to decide.
Included in the package are:
class.log.php (the main class)
class.log.processor.php (an interface for a log processor, very basic!!)
class.log.output.php (a simple extension of the processor (outputs <p> tags with s)
class.log.mysql.php (a simple mysql importer of the processed data)
parse.php (a simple implementation, bringing all the classes together)
The class is designed to use one line at a time from the log retrieved, and the regular expression specified in class.log.php can be modified to parse different types of logs as long as the matches array is also updated.
I’ve only used this for apache logs currently, which it managed quite well, I’m not sure if I’d use this script in an automated script, but I’ll leave that for you to decide.
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
<?php /* class.log.php */ // ------------------------- // log // // main log class for parsing text logs and processing the data // must be used with a log processor descendent // // ------------------------- // $patterns // // add other regular expression patterns and matches arrays here to match other log formats // the $patterns[$key] (eg. apacheDefault) should be passed to the parse() function as the // $type parameter // // pattern // // a perl compatible regular expression for separating out the data in the log line // // ip address identd auth day month year time TZ request http code size referrer navigator 'pattern' => '/(\d+\.\d+\.\d+\.\d+) ([^\s]+) ([^\s]+) \[(\d+)\/(\w+)\/(\d+):(\d{1,2}:\d{1,2}:\d{1,2} ?[\+\-]?\d*)\] "(.*) (HTTP\/\d\.\d)" (\d+) (\d+) "([^"]*)" "([^"]*)"/', // matches // // the matches here, represent the $matches index from preg_match for the pattern // above // // if using log.mysql, an array must be passed to the parser constructor containing // key => value pairs relating these matches to mysql tables // 1 => 'ip', 2 => 'identd', 3 => 'auth', 4 => 'day', 5 => 'month', 6 => 'year', 7 => 'time', 8 => 'request', 9 => 'http_version', 10 => 'response_code', 11 => 'size', 12 => 'referrer', 13 => 'navigator' ) ) ); // parser // // this will be set to the specified parser when required // static $parser = null; // parse // // this is the function called when you have set up all the settings // // $filename is the filename of the log you wish to parse, and $type is the // key in the $patterns array specified above // function parse($filename = '', $type = 'apacheDefault') { // check all the settings are correct throw new Exception('Requested type not available ('.$type.')'); } throw new Exception('File does not exist ('.$filename.')'); } throw new Exception('File is not readable ('.$filename.')'); } throw new Exception('No parser specified (Set: log::$parser = new parser_type();)'); } // end check settings // open the file // while it's not at the end... // read the line // if the line matches // set up an array // loop through the pattern's matches and set the data array correctly foreach (self::$patterns[$type]['matches'] as $i => $key) { $data[$key] = $matches[$i]; } // parse the data self::$parser->process($data); } } // close the file // return true, why not! return true; } } /* class.log.processor.php */ // ------------------------- // log_processor interface // // implement this class in any processors your write // // currently the only required function is process, which processes the // array log of log data returned from preg_match // // ------------------------- interface log_processor { function process($data); } /* class.log.output.php */ // include the interface class require_once('class.log.processor.php'); } // ------------------------- // log_output // // very basic class to output data in a very simple format // // ------------------------- class log_output implements log_processor { // process // // the function called by the log class // final function process($data) { // open a <p> tag $r = '<p>'; // loop through each field of the data foreach ($data as $key => $value) { // build a <span> with a class of $key containing $value $r .= "<span class=\"{$key}\">{$value}</span> "; } // close the </p> $r .= '</p>'; // output the html print $r; // return it too, just in case return $r; } } /* class.log.mysql.php */ // include the interface class require_once('class.log.processor.php'); } // ------------------------- // log_mysql // // mysql class for storing parsed log data in a table // in my tests, this script processes ~1500-4000 rows/second // // ------------------------- // Example table structure for apache: // // CREATE TABLE `log` ( // `id` int(11) NOT NULL auto_increment, // `ip` varchar(255) NOT NULL default '', // `identd` varchar(255) NOT NULL default '', // `auth` varchar(255) NOT NULL default '', // `day` int(8) NOT NULL default '0', // `month` varchar(255) NOT NULL default '', // `year` int(8) NOT NULL default '0', // `time` varchar(255) NOT NULL default '', // `request` text NOT NULL, // `http_version` varchar(255) NOT NULL default '', // `response_code` int(8) NOT NULL default '0', // `size` int(11) NOT NULL default '0', // `referrer` text NOT NULL, // `navigator` text NOT NULL, // PRIMARY KEY (`id`) // ) ENGINE=MyISAM DEFAULT CHARSET=latin1 // // and the example $fields array to complement it: // // $fields => array( // // key in $data field name in table // 'ip' => 'ip', // 'identd' => 'identd', // 'auth' => 'auth', // 'day' => 'day', // 'month' => 'month', // 'year' => 'year', // 'time' => 'time', // 'request' => 'request', // 'http_version' => 'http_version', // 'response_code' => 'response_code', // 'size' => 'size', // 'referrer' => 'referrer', // 'navigator' => 'navigator' // ); // // ------------------------- class log_mysql implements log_processor { // set up the variables var $host = 'localhost'; var $user = 'root'; var $pass = ''; var $db = ''; var $table = ''; // end set up variables // fields // // this should be an array of key => value to 'translate' the data array // keys to mysql fields // // the mysql connection data var $connection = false; // counter for rows processed var $rows = 0; // __construct // // executed when instatiated // // $settings is an array that contains the database settings // host, user, pass, db and table are all strings relating the mysql database // fields should be an array of key => value pairs that are $data['key'] => mysql table field // // process $settings throw new Exception('log_mysql $settings should be an array'); } $this->user = $settings['user']; } $this->pass = $settings['pass']; } $this->host = $settings['host']; } $this->db = $settings['db']; } $this->table = $settings['table']; } $this->fields = $settings['fields']; } throw new Exception('Missing field data ($this->fields)'); } throw new Exception('Missing MySQL table name'); } // end process $settings // connect to the database $this->connect(); // don't need to return anything, we're getting the object anyway } // process // // the function called by the log class // // $data is the array of data from the parsed log // final function process($data) { // try and insert the data if ($this->insert($data)) { // if it's worked, increment the $rows counter $this->rows++; // return true for good measure return true; // if not... } else { // throw an exception throw new Exception('Error inserting data to MySQL server'); } } // connect // // connect to the mysql database // private function connect() { // set $this->connection to the mysql server connection // if we connected ok... if ($this->connection) { // try to select the database // ... again for good measure... return true; // if something went wrong } else { // throw an exception throw new Exception('Unable to select database ('.$this->db.')'); } // if something went wrong } else { // throw an exception throw new Exception('Unable to connect to MySQL server'); } } // insert // // inserts the data to the mysql table // // $data is the array passed from process // private function insert($data) { // build the query $q = "INSERT INTO `{$this->table}` SET "; // add each set to an array, for easy string concatenation // loop through the fields foreach ($this->fields as $name => $field) { // escape the data // add it to the array $sets[] = "`{$field}` = '{$data[$name]}'"; } // implode the array // finish the query building $q .= ';'; // execute the query // return the result return $result; } } /* ---- */ /* Demo */ /* ---- */ /* parse.php */ // make sure the script doesn't die if parsing a long log // include all the classes require_once('class.log.php'); require_once('class.log.mysql.php'); require_once('class.log.output.php'); // see class.log.mysql.php for example table setup // // log::$parser = new log_mysql(array( // 'user' => 'mysql-username', // 'pass' => 'My5q1_p455w0|2D', // 'db' => 'php_log', // 'table' => 'log', // 'fields' => array( // 'ip' => 'ip', // 'identd' => 'identd', // 'auth' => 'auth', // 'day' => 'day', // 'month' => 'month', // 'year' => 'year', // 'time' => 'time', // 'request' => 'request', // 'http_version' => 'http_version', // 'response_code' => 'response_code', // 'size' => 'size', // 'referrer' => 'referrer', // 'navigator' => 'navigator' // ) // )); // /* test.log 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET / HTTP/1.1" 200 304 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1" 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /img/logo.gif HTTP/1.1" 200 570 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1" 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /css/css.css HTTP/1.1" 200 124 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
URL: http://www.dom111.co.uk/blog/coding/php-log-process-apache-log-files-with-php/49