Return to Snippet

Revision: 31914
at September 15, 2010 20:07 by dom111


Initial Code
<?php
/* class.log.php */

// -------------------------
// log
// 
// main log class for parsing text logs and processing the data
// must be used with a log processor descendent
// 
// -------------------------
class log {
  // $patterns
  // 
  // add other regular expression patterns and matches arrays here to match other log formats
  // the $patterns[$key] (eg. apacheDefault) should be passed to the parse() function as the 
  // $type parameter
  // 
  static $patterns = array(
    'apacheDefault' => array(
      // pattern
      // 
      // a perl compatible regular expression for separating out the data in the log line
      // 
      //            ip address            identd    auth      day    month   year   time                    TZ            request   http        code  size  referrer  navigator
      'pattern' => '/(\d+\.\d+\.\d+\.\d+) ([^\s]+) ([^\s]+) \[(\d+)\/(\w+)\/(\d+):(\d{1,2}:\d{1,2}:\d{1,2} ?[\+\-]?\d*)\] "(.*) (HTTP\/\d\.\d)" (\d+) (\d+) "([^"]*)" "([^"]*)"/',
      
      // matches
      // 
      // the matches here, represent the $matches index from preg_match for the pattern
      // above
      // 
      // if using log.mysql, an array must be passed to the parser constructor containing
      // key => value pairs relating these matches to mysql tables
      // 
      'matches' =>array(
        1 => 'ip',
        2 => 'identd',
        3 => 'auth',
        4 => 'day',
        5 => 'month',
        6 => 'year',
        7 => 'time',
        8 => 'request',
        9 => 'http_version',
        10 => 'response_code',
        11 => 'size',
        12 => 'referrer',
        13 => 'navigator'
      )
    )
  );
  
  // parser
  // 
  // this will be set to the specified parser when required
  // 
  static $parser = null;
  
  // parse
  // 
  // this is the function called when you have set up all the settings
  // 
  // $filename is the filename of the log you wish to parse, and $type is the
  // key in the $patterns array specified above
  // 
  function parse($filename = '', $type = 'apacheDefault') {
    // check all the settings are correct
    if (!isset(self::$patterns[$type])) {
      throw new Exception('Requested type not available ('.$type.')');
    }
    
    if (!file_exists($filename)) {
      throw new Exception('File does not exist ('.$filename.')');
    }
    
    if (!is_readable($filename)) {
      throw new Exception('File is not readable ('.$filename.')');
    }
    
    if (empty(self::$parser)) {
      throw new Exception('No parser specified (Set: log::$parser = new parser_type();)');
    }
    // end check settings

    // open the file
    $handle = fopen($filename, 'r');
    
    // while it's not at the end...
    while (!feof($handle)) {
      // read the line
      $line = fgets($handle);
      
      // if the line matches
      if (preg_match(self::$patterns[$type]['pattern'], $line, $matches)) {
        // set up an array
        $data = array();
        
        // loop through the pattern's matches and set the data array correctly
        foreach (self::$patterns[$type]['matches'] as $i => $key) {
          $data[$key] = $matches[$i];
        }
        
        // parse the data
        self::$parser->process($data);
      }
    }
    
    // close the file
    fclose($handle);
    
    // return true, why not!
    return true;
  }
}

/* class.log.processor.php */

// -------------------------
// log_processor interface
// 
// implement this class in any processors your write
// 
// currently the only required function is process, which processes the
// array log of log data returned from preg_match
// 
// -------------------------
interface log_processor {
  function process($data);
}

/* class.log.output.php */

// include the interface class
if (!class_exists('log_processor')) {
  require_once('class.log.processor.php');
}

// -------------------------
// log_output
// 
// very basic class to output data in a very simple format
// 
// -------------------------
class log_output implements log_processor {
  // process
  // 
  // the function called by the log class
  // 
  final function process($data) {
    // open a <p> tag
    $r = '<p>';
    
    // loop through each field of the data
    foreach ($data as $key => $value) {
      // build a <span> with a class of $key containing $value
      $r .= "<span class=\"{$key}\">{$value}</span> ";
    }
    
    // close the </p>
    $r .= '</p>';
    
    // output the html
    print $r;
    
    // return it too, just in case
    return $r;
  }
}


/* class.log.mysql.php */

// include the interface class
if (!class_exists('log_processor')) {
  require_once('class.log.processor.php');
}

// -------------------------
// log_mysql
// 
// mysql class for storing parsed log data in a table
// in my tests, this script processes ~1500-4000 rows/second
// 
// -------------------------
// Example table structure for apache:
// 
// CREATE TABLE `log` (
//   `id` int(11) NOT NULL auto_increment,
//   `ip` varchar(255) NOT NULL default '',
//   `identd` varchar(255) NOT NULL default '',
//   `auth` varchar(255) NOT NULL default '',
//   `day` int(8) NOT NULL default '0',
//   `month` varchar(255) NOT NULL default '',
//   `year` int(8) NOT NULL default '0',
//   `time` varchar(255) NOT NULL default '',
//   `request` text NOT NULL,
//   `http_version` varchar(255) NOT NULL default '',
//   `response_code` int(8) NOT NULL default '0',
//   `size` int(11) NOT NULL default '0',
//   `referrer` text NOT NULL,
//   `navigator` text NOT NULL,
//   PRIMARY KEY  (`id`)
// ) ENGINE=MyISAM DEFAULT CHARSET=latin1
// 
// and the example $fields array to complement it:
// 
// $fields => array(
// // key in $data       field name in table
//   'ip'            => 'ip',
//   'identd'        => 'identd',
//   'auth'          => 'auth',
//   'day'           => 'day',
//   'month'         => 'month',
//   'year'          => 'year',
//   'time'          => 'time',
//   'request'       => 'request',
//   'http_version'  => 'http_version',
//   'response_code' => 'response_code',
//   'size'          => 'size',
//   'referrer'      => 'referrer',
//   'navigator'     => 'navigator'
// );
// 
// -------------------------
class log_mysql implements log_processor {
  // set up the variables
  var $host = 'localhost';
  var $user = 'root';
  var $pass = '';
  var $db = '';
  var $table = '';
  // end set up variables
  
  // fields
  // 
  // this should be an array of key => value to 'translate' the data array
  // keys to mysql fields
  // 
  var $fields = array();
  
  // the mysql connection data
  var $connection = false;
  
  // counter for rows processed
  var $rows = 0;
  
  // __construct
  // 
  // executed when instatiated
  // 
  // $settings is an array that contains the database settings
  // host, user, pass, db and table are all strings relating the mysql database
  // fields should be an array of key => value pairs that are $data['key'] => mysql table field
  // 
  final function __construct($settings = array()) {
    // process $settings
    if (!is_array($settings)) {
      throw new Exception('log_mysql $settings should be an array');
    }
    
    if (isset($settings['user'])) {
      $this->user = $settings['user'];
    }
    
    if (isset($settings['pass'])) {
      $this->pass = $settings['pass'];
    }
    
    if (isset($settings['host'])) {
      $this->host = $settings['host'];
    }
    
    if (isset($settings['db'])) {
      $this->db = $settings['db'];
    }
    
    if (isset($settings['table'])) {
      $this->table = $settings['table'];
    }
    
    if (isset($settings['fields'])) {
      $this->fields = $settings['fields'];
    }
    
    if (empty($this->fields)) {
      throw new Exception('Missing field data ($this->fields)');
    }
    
    if (empty($this->table)) {
      throw new Exception('Missing MySQL table name');
    }
    // end process $settings
    
    // connect to the database
    $this->connect();
    
    // don't need to return anything, we're getting the object anyway
  }
  
  // process
  // 
  // the function called by the log class
  // 
  // $data is the array of data from the parsed log
  // 
  final function process($data) {
    // try and insert the data
    if ($this->insert($data)) {
      // if it's worked, increment the $rows counter
      $this->rows++;
      
      // return true for good measure
      return true;
      
    // if not...
    } else {
      // throw an exception
      throw new Exception('Error inserting data to MySQL server');
    }
  }
  
  // connect
  // 
  // connect to the mysql database
  // 
  private function connect() {
    // set $this->connection to the mysql server connection
    $this->connection = mysql_connect($this->host, $this->user, $this->pass);
    
    // if we connected ok...
    if ($this->connection) {
      // try to select the database
      if (mysql_select_db($this->db, $this->connection)) {
        // ... again for good measure...
        return true;
        
      // if something went wrong
      } else {
        // throw an exception
        throw new Exception('Unable to select database ('.$this->db.')');
      }
      
    // if something went wrong
    } else {
      // throw an exception
      throw new Exception('Unable to connect to MySQL server');
    }
  }
  
  // insert
  // 
  // inserts the data to the mysql table
  // 
  // $data is the array passed from process
  // 
  private function insert($data) {
    // build the query
    $q = "INSERT INTO
      `{$this->table}`
    SET ";
    
    // add each set to an array, for easy string concatenation
    $sets = array();
    
    // loop through the fields
    foreach ($this->fields as $name => $field) {
      // escape the data
      $data[$name] = mysql_real_escape_string($data[$name]);
      $field = mysql_real_escape_string($field);
      
      // add it to the array
      $sets[] = "`{$field}` = '{$data[$name]}'";
    }
    
    // implode the array
    $q .= implode(', ', $sets);
    
    // finish the query building
    $q .= ';';
    
    // execute the query
    $result = mysql_query($q, $this->connection);
    
    // return the result
    return $result;
  }
}

/* ---- */
/* Demo */
/* ---- */

/* parse.php */

// make sure the script doesn't die if parsing a long log
ini_set('max_execution_time', '0');

// include all the classes
require_once('class.log.php');
require_once('class.log.mysql.php');
require_once('class.log.output.php');

// see class.log.mysql.php for example table setup
// 
// log::$parser = new log_mysql(array(
//   'user' => 'mysql-username',
//   'pass' => 'My5q1_p455w0|2D',
//   'db' => 'php_log',
//   'table' => 'log',
//   'fields' => array(
//     'ip'            => 'ip',
//     'identd'        => 'identd',
//     'auth'          => 'auth',
//     'day'           => 'day',
//     'month'         => 'month',
//     'year'          => 'year',
//     'time'          => 'time',
//     'request'       => 'request',
//     'http_version'  => 'http_version',
//     'response_code' => 'response_code',
//     'size'          => 'size',
//     'referrer'      => 'referrer',
//     'navigator'     => 'navigator'
//   )
// ));
// 
log::$parser = new log_output();

log::parse('test.log');

/* test.log 

123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET / HTTP/1.1" 200 304 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /img/logo.gif HTTP/1.1" 200 570 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /css/css.css HTTP/1.1" 200 124 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"

Initial URL
http://www.dom111.co.uk/blog/coding/php-log-process-apache-log-files-with-php/49

Initial Description
I’ve recently had to quickly parse an Apache log file, to get basic usage statistics for a site over a month and wanted the ability to quickly process any general logs and store them in a MySQL table, so I’ve made a small package class.log.

Included in the package are:

class.log.php (the main class)
class.log.processor.php (an interface for a log processor, very basic!!)
class.log.output.php (a simple extension of the processor (outputs <p> tags with <span>s)
class.log.mysql.php (a simple mysql importer of the processed data)
parse.php (a simple implementation, bringing all the classes together)
The class is designed to use one line at a time from the log retrieved, and the regular expression specified in class.log.php can be modified to parse different types of logs as long as the matches array is also updated.

I’ve only used this for apache logs currently, which it managed quite well, I’m not sure if I’d use this script in an automated script, but I’ll leave that for you to decide.

Initial Title
php-log - Process log files with PHP

Initial Tags
mysql, php, apache, log

Initial Language
PHP