Hallo, dies ist ein Test.
PWD: /www/data-lst1/unixsoft/unixsoft/kaempfer/.public_html
Running in File Mode
Relative path: ./../../../../../server/etc/scripts/wwwstat-2.0/splitlog.pl
Real path: /www/server/etc/scripts/wwwstat-2.0/splitlog.pl
Zurück
#!YOUR_PERL_BINARY # ========================================================================== $Version = 'splitlog-1.0'; # # Copyright (c) 1996 Regents of the University of California. # # This software has been developed by Roy Fielding <fielding@ics.uci.edu> as # part of the WebSoft project at the University of California, Irvine. # <http://www.ics.uci.edu/pub/websoft/wwwstat/> # See the file LICENSE for licensing and redistribution information. # See the file INSTALL for installation information. # See the file Changes for known problems and version information. # See the file README for more information. # See the splitlog.1 man page for options and usage information. # sub usage { die <<"EndUsage"; usage: $Pname [-f config] [-h] [-e] [-x] [-v] [-dir directory] [-anon imu] [-dns] [-nodns] [-cache filename] [-m method] [-M method] [-a IP_address] [-c code] [-d date] [-t hour] [-n URL_path] [-A IP_address] [-C code] [-D date] [-T hour] [-N URL_path] [-noescape] [--] [ logfile | + | - ]... $Version Process a sequence of httpd Common Logfile Format access_log files and split them into separate files according to requested URL or virtual host. Configuration options: -f Get configuration defaults from the given file. -- Last option (all later arguments are treated as filenames). Diagnostic Options: -h Help -- just display this message to STDERR and quit. -e Display to STDERR all invalid log entries. Process Options: -x Discard any logfile entries without a filename key. -v Use the logfile entry prefix (virtual host) for output filename. -dir Put the split logfiles in the given directory. -anon Anonymize the logfile entries: i=ident, m=machine, u=userid. -dns Use DNS to lookup unresolved IP addresses (may be slow). -nodns Do not lookup unresolved IP addresses. -cache Use the given dbm file to read/write DNS cache. Search Options (include only those log entries ...): -a Containing a hostname/IP address matching the given perl regexp. -A Not containing " " " " " " " " -c Containing a server response code matching the given perl regexp. -C Not containing " " " " " " " " -d Containing a date ("Feb 02 1996") matching the given perl regexp. -D Not containing " " " " " " " " -t Containing an hour ("00" -- "23") matching the given perl regexp. -T Not containing " " " " " " " " -n Containing a URL path matching the given perl regexp (except +.). -N Not containing " " " " " " " " -m Using an HTTP method name matching the given perl regexp. -M Not using the HTTP method " " " " "" -noescape Do not escape "." and "+" in remaining search options. Filenames (none implies "+"): - Read standard input (STDIN). + Read the default logfile $DefaultLog. ... Anything else is treated as the name of a logfile to be read. EndUsage } # ========================================================================== # The main program is really quite simple ... $Pname = $0; if ($Pname =~ s#^(.*)/##) { push(@INC, $1); } # Modify include path for bin, # current dir, and home dir unshift(@INC, '.', ($ENV{'HOME'} || $ENV{'LOGDIR'})); $StartTime = time; # Get the current date-time stamp $Updated = &wtime($StartTime,''); # Format it as local time &init_defaults; # Set the default configuration &get_configuration; # Get user configuration &get_commandline; # Get command-line options &init_DNS if $LookupDNS; # Initialize the DNS cache &init_handles; # Initialize the output file handles if ($ARGV[0]) # Check for explicit filenames { foreach $filename (@ARGV) { &process_file($filename); } } else # if none, just read the default { &process_file($DefaultLog); } &close_handles; # Close all the output filehandles &close_DNS if $LookupDNS; # Close the DNS cache exit(0); # and we are finished. # ========================================================================== # ========================================================================== # Initialize default configuration options. NOTE that all of these options # can be overridden in either the system or user configuration files and # many can be overridden on the command-line. # sub init_defaults { # Specify the command for displaying compressed files to STDOUT $Zcat = 'gunzip -c'; # specify as null string if none are available $Zhandle = '(gz|Z|z)'; # list file extensions that indicate compressed # If address in log entry is one word (a local host), append what? # Specify an appropriate '.sub.dom.ain' $AppendToLocalhost = '.no_where.com'; # Specify the maximum number of open file handles allowed. # You will get "Failed open of ..." errors if it is set too high. $MaxHandles = 50; # Specify the default location of your access log $DefaultLog = '/www/server/etc/apache/logs/access_log'; # Specify the default destination directory for the split logfiles $DestDir = ''; # current directory # Specify the filename (no .ext) for non-matching, non-split log entries $Remaining = 'OTHERS'; # Specify the amount of anonymization done to the split log entries. # May be '','i','m','u','im','iu','mi','mu','ui','um','imu' # '' = none # /i/ = ident field removed # /m/ = machine name replaced with ANON or 0 # /u/ = authentication userid field removed $Anonymize = ''; # Specify whether (1) or not (0) you want the split filenames to be # based on a prefix (assumed to be a virtual host) of each logfile # entry instead of on the requested URL path. The prefix is terminated # by the first colon ':' or space. $Vhosts = 0; # Specify whether (1) or not (0) you want to lookup unresolved # IP addresses via DNS. Note that this could be *very* slow! $LookupDNS = 1; $DNScachefile = 'dnscache'; # DBM files for persistent cache $DNSexpires = 5356800; # Cache for two months (in seconds) # The rest of these options are normally only changed on the command-line $PrintInvalids = 0; # Display invalid log entries on STDERR? $SearchAddress = ''; # Pattern to look for in hostname/IP addresses $SearchCode = ''; # Pattern to look for in Code $SearchDate = ''; # Pattern to look for in Date $SearchTime = ''; # Pattern to look for in Hour $SearchPath = ''; # Pattern to look for in URL Path $SearchMethod = ''; # Pattern to look for in Method $NotAddress = ''; # Pattern to reject entry if in IP addresses $NotCode = ''; # Pattern to reject entry if in Code $NotDate = ''; # Pattern to reject entry if in Date $NotTime = ''; # Pattern to reject entry if in Hour $NotPath = ''; # Pattern to reject entry if in URL Path $NotMethod = ''; # Pattern to reject entry if in Method $EscapeSpecials = '[+.]'; # Most users forget about regexp syntax # The default user configuration filename should only # be changed if your filesystem can't handle this name. $UconfigFile = 'splitlog.rc'; # User file for overriding defaults } # ========================================================================== # Get the user configuration # sub get_configuration { local($forced) = 0; if (defined($ARGV[0]) && ($ARGV[0] eq '-f')) { shift @ARGV; $UconfigFile = shift @ARGV; $forced = 1; } if ($UconfigFile) { eval 'require $UconfigFile;'; if ($@ && $forced) { die "Unable to read $UconfigFile: $!\n"; } else { $! = 0; undef $@; } } } # ========================================================================== # Get the command-line options. # sub get_commandline { local($_, $first, $rest, $pos); local($letteropts) = 'hexvm:M:c:C:t:T:a:A:n:N:d:D:f:'; local(@args) = split(//, $letteropts); while (defined($_ = $ARGV[0])) { if ($_ eq '--') { shift @ARGV; last; } # Last option indicator if ($_ eq '-') { last; } # STDIN file indicator if (!s/^-//) { last; } # Not an option if (/^no(.*)/) # Exclude some option { # indicated by suffix unless ($_ = $1) { # or next argument shift @ARGV; &badarg('-no requires value') unless ($_ = $ARGV[0]); } if (/^escape$/) { $EscapeSpecials = ''; } elsif (/^dns$/) { $LookupDNS = 0; } else { &badarg('-no',$_); } } elsif (/^anon(.*)/) # Anonymize logfile { # at Nth level unless ($_ = $1) { shift @ARGV; &badarg('-anon requires value') unless ($_ = $ARGV[0]); } &badarg('-anon', $_) unless (/^[imu]+$/); $Anonymize = $_; } elsif (/^dns$/) # Resolve IP addresses { $LookupDNS = 1; } elsif (/^cache(.*)/) # Change cache filename { unless ($_ = $1) { shift @ARGV; &badarg('-cache requires value') unless ($_ = $ARGV[0]); } $DNScachefile = $_; } elsif (/^dir(.*)/) # Output Directory { unless ($_ = $1) { shift @ARGV; &badarg('-dir requires value') unless ($_ = $ARGV[0]); } $DestDir = $_; } else # End of full-word option arguments { while() # Loop by each character { ($first,$rest) = /^(.)(.*)/; if (($pos = index($letteropts,$first)) < 0) { &badarg("Unknown option:",$first); } if ($args[$pos+1] eq ':') { shift(@ARGV); if ($rest eq '') { &badarg($first,"requires value") unless @ARGV; $rest = $ARGV[0]; } &set_option($first, $rest); last; } else { &set_option($first, 1); $_ = $rest; last unless $_; } } } } continue { shift @ARGV; } } # ========================================================================== # Set the single-letter command-line option given. Gee, this is fun. # sub set_option { local($opt, $value) = @_; if ($opt eq 'h') { &usage; } elsif ($opt eq 'e') { $PrintInvalids = 1; } elsif ($opt eq 'x') { $Remaining = ''; } elsif ($opt eq 'v') { $Vhosts = 1; } elsif ($opt eq 'm') { if ($SearchMethod) { $SearchMethod = "($SearchMethod|$value)"; } else { $SearchMethod = $value; } } elsif ($opt eq 'M') { if ($NotMethod) { $NotMethod = "($NotMethod|$value)"; } else { $NotMethod = $value; } } elsif ($opt eq 'c') { if ($SearchCode) { $SearchCode = "($SearchCode|$value)"; } else { $SearchCode = $value; } } elsif ($opt eq 'C') { if ($NotCode) { $NotCode = "($NotCode|$value)"; } else { $NotCode = $value; } } elsif ($opt eq 't') { if ($SearchTime) { $SearchTime = "($SearchTime|$value)"; } else { $SearchTime = $value; } } elsif ($opt eq 'T') { if ($NotTime) { $NotTime = "($NotTime|$value)"; } else { $NotTime = $value; } } elsif ($opt eq 'a') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($SearchAddress) { $SearchAddress = "($SearchAddress|$value)"; } else { $SearchAddress = $value; } } elsif ($opt eq 'A') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($NotAddress) { $NotAddress = "($NotAddress|$value)"; } else { $NotAddress = $value; } } elsif ($opt eq 'n') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($SearchPath) { $SearchPath = "($SearchPath|$value)"; } else { $SearchPath = $value; } } elsif ($opt eq 'N') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($NotPath) { $NotPath = "($NotPath|$value)"; } else { $NotPath = $value; } } elsif ($opt eq 'd') { if ($value eq 'today') { $value = substr($Updated, 8, 4) . substr($Updated, 5, 3) . substr($Updated, 12, 4); $value =~ s/ 0(\d) / $1 /; } if ($SearchDate) { $SearchDate = "($SearchDate|$value)"; } else { $SearchDate = $value; } } elsif ($opt eq 'D') { if ($value eq 'today') { $value = substr($Updated, 8, 4) . substr($Updated, 5, 3) . substr($Updated, 12, 4); $value =~ s/ 0(\d) / $1 /; } if ($NotDate) { $NotDate = "($NotDate|$value)"; } else { $NotDate = $value; } } elsif ($opt eq 'f') { die "The -f option MUST be first option after -F option (if any)\n"; } else { &badarg("Unknown option:", $opt); } } # ========================================================================== sub badarg { local($dreck) = join(' ', @_); warn "Bad command option: $dreck\n"; &usage; } # ========================================================================== # process the given filename as FILE, based on the content of its first line. # sub process_file { local($filename) = @_; if ($filename eq '+') { $filename = $DefaultLog; } if ($Zhandle && ($filename =~ /\.$Zhandle$/o)) { if (!$Zcat) { warn "No zcat decompression command has been defined\n"; return; } $filename = "$Zcat $filename |"; } if (!open(FILE,$filename)) { warn "Error opening $filename: $!\n"; return; } &process_log; close(FILE); } # ========================================================================== # Process the access_log FILE by reading each entry, validating and # categorizing the access, and then appending it to one of the split logs # sub process_log { local($host, $ident, $authuser, $timestamp, $request, $status, $bytes); local($trailer, $hour, $date, $method, $htv); local($dvalue, $path, $pathkey, $outfile, $saveline); LINE: while ($_ = <FILE>) { s/^\0+//; # This is due to a bug in perl 5.002 s/\0//g; # because this line should be sufficient $saveline = $_; # If we are supposed to be splitting by virtual host, then it # is assumed that the virtual hostname is prefixed to log entry if ($Vhosts && s/^([^: ]+)[: ]//) { $vhost = $1; } else { $vhost = undef; } # # Parse the logfile entry into its seven basic components # ($host, $ident, $authuser, $timestamp, $request, $status, $bytes, $trailer) = /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+)(.*)/; #" Now, is this garbage or is it memorex? Note that $bytes can be 0 if (!($host && $ident && $authuser && $timestamp && $request && $status)) { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($status !~ /^(-|\d\d\d)$/) # Test the response code { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($SearchCode) { next LINE unless ($status =~ m#$SearchCode#o); } if ($NotCode) { next LINE unless ($status !~ m#$NotCode#o );} if ($bytes !~ /^(-|\d+)$/ ) # Test the bytes transferred { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if (!defined($trailer)) { $trailer = ''; } # # Looks okay so far -- Now figure out when the request was made. # if ($timestamp =~ m#^([ 0-3]?\d)/([A-Za-z]+)/(\d{4}):(\d\d):\d\d:\d\d [+ -]\d{1,4}#) { $date = "$2 $1 $3"; $hour = "$4"; } else { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($SearchDate) { next LINE unless ($date =~ m#$SearchDate#o); } if ($NotDate) { next LINE unless ($date !~ m#$NotDate#o); } if ($SearchTime) { next LINE unless ($hour =~ m#$SearchTime#o); } if ($NotTime) { next LINE unless ($hour !~ m#$NotTime#o); } # # Then parse the method and URL pathname from request # ($method, $path, $htv) = split(' ', $request, 3); if ($SearchMethod) { next LINE unless ($method =~ m#$SearchMethod#o); } if ($NotMethod) { next LINE unless ($method !~ m#$NotMethod#o); } if ($SearchPath) { next LINE unless ($path =~ m#$SearchPath#o); } if ($NotPath) { next LINE unless ($path !~ m#$NotPath#o); } # # Get hostname/IP address and determine domain and reversed subdomain. # $host =~ tr/A-Z/a-z/; $host =~ s/\.$//; if ($host =~ /^[^.]+$/) # Unqualified hostname { if ($AppendToLocalhost) { $host .= $AppendToLocalhost; } } elsif ($host =~ /^\d+\.\d+\.\d+\.\d+$/) # IP number { if ($LookupDNS && ($dvalue = &resolve($host))) { $host = $dvalue; if ($AppendToLocalhost && ($host =~ /^[^.]+$/)) { $host .= $AppendToLocalhost; } } } $host =~ s/\.[\d.]*in-addr\.arpa$//; # Remove any DNS garbage if ($SearchAddress) { next LINE unless ($host =~ m#$SearchAddress#o); } if ($NotAddress) { next LINE unless ($host !~ m#$NotAddress#o); } # # Anonymize those parts which might be considered private # if ($Anonymize) { ($host, $ident, $authuser) = &anonymous($host, $ident, $authuser); } # # Append the log entry to file indicated by vhost and/or path # $pathkey = &path_map($vhost, $path); next LINE unless defined($outfile = &get_handle($pathkey)); print $outfile $host, ' ', $ident, ' ', $authuser, ' [', $timestamp, '] "', $request, '" ', $status, ' ', $bytes, $trailer, "\n"; } } # ========================================================================== # Determine the output logfile name from the vhost prefix and/or URL path. # The user configuration file can intercede by defining &user_path_map(). # Returning undef means append it to the default file. # sub path_map { local($vhost, $path) = @_; if (defined &user_path_map) { local($pathkey) = &user_path_map($vhost, $path); return $pathkey if defined($pathkey); } # The rest is just a simple default if the user does not want to # define their own routine. return $vhost if $vhost; # Use prefix if we got it return undef unless defined($path); # Anything to work with? return 'proxy' if ($path =~ m#^[-+a-zA-Z]+:#); # Full-URLs to proxy.log return undef unless ($path =~ s#^/(.)#$1#); # Errors and / to default if ($path =~ m#^(~|%7e|((pub|homes?|users?)(/|$)))([^/]*)#i) { if ($5) { return $5; } # User-owned directories else { return undef; } } # Otherwise, just use the top component of remaining URL # if it doesn't look like a file local($top, $rest) = split(/\//, $path, 2); if (defined($rest) || ($top !~ /\./)) { return $top; } return undef; # All remaining get put in the default file } # ========================================================================== # Initialize the file handle arrays # sub init_handles { %OpenHandles = (); @HandlesInUse = (); $NextHandle = 'fh000'; if ($DestDir) { if ($DestDir !~ m#/$#) { $DestDir .= '/'; } unless (-d $DestDir && -w _) { die "Destination $DestDir must be a writable directory.\n"; } } } # ========================================================================== # Get the file handle corresponding to the pathkey (the filename) # sub get_handle { local($pathkey) = @_; local($handle); # First be sure we have a legal filename in pathkey if (!$pathkey) { if ($Remaining) { $pathkey = $Remaining; } else { return undef; } } elsif ($pathkey =~ /^std(in|out|err)$/i) { $pathkey .= 'X'; # To prevent accidental output } else { $pathkey =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('C',hex($1))/ge; $pathkey =~ y/A-Z/a-z/; $pathkey =~ y/-+_0-9a-z/_/cs; # To prevent disaster } # See if we already have it open and ready to write return $handle if defined($handle = $OpenHandles{$pathkey}); # See if we already have too many files opened if (($#HandlesInUse + 1) >= $MaxHandles) { local($oldkey) = shift @HandlesInUse; # close the oldest $handle = $OpenHandles{$oldkey}; delete $OpenHandles{$oldkey}; close $handle; } # Finally, try to open and remember a new handle for this pathkey $handle = ++$NextHandle; if (open($handle, ">>$DestDir$pathkey.log")) { push(@HandlesInUse, $pathkey); $OpenHandles{$pathkey} = $handle; return $handle; } else { warn "Failed open of $DestDir$pathkey.log: $!\n"; return undef; } } # ========================================================================== # Close all the open filehandles. This isn't necessary, but I like to do it. # sub close_handles { local($pathkey, $handle); while (($pathkey, $handle) = each %OpenHandles) { close $handle; } undef %OpenHandles; undef @HandlesInUse; } # ========================================================================== # Anonymize those parts which might be considered private # sub anonymous { local($host, $ident, $authuser) = @_; if ($Anonymize =~ /i/) { $ident = '-'; } if ($Anonymize =~ /u/) { $authuser = '-'; } if ($Anonymize =~ /m/) { unless ($host =~ s/\.\d+$/.0/) { $host =~ s/^[^.]+\./ANON./; } } return ($host, $ident, $authuser); } # ========================================================================== # Initialize the DNS cache and remove entries that have expired. # sub init_DNS { local($ipnum, $value, $host, $seen, @expired); if ($DNScachefile) { dbmopen(%DNScache, $DNScachefile, 0666) || die "Cannot open DBM files $DNScachefile: $!\n"; while (($ipnum, $value) = each %DNScache) { ($host, $seen) = split(/\|/, $value); if ($StartTime > ($seen + $DNSexpires)) { push(@expired, $ipnum); } } foreach $ipnum (@expired) { delete $DNScache{$ipnum}; } } else { %DNScache = (); } } # ========================================================================== # Close the DNS cache # sub close_DNS { if ($DNScachefile) { dbmclose(%DNScache); } } # ========================================================================== # Resolve an IP address to its DNS hostname (if it has one) with caching. # sub resolve { local($ip) = @_; local($ipnum, $value, $host, $seen, $aliases, $addrtype, $length, @addrs); $ipnum = pack('C4', split(/\./, $ip)); if (defined($value = $DNScache{$ipnum})) { ($host, $seen) = split(/\|/, $value); return $host; } ($host, $aliases, $addrtype, $length, @addrs) = gethostbyaddr($ipnum, 2); if (!defined($host)) { $host = ''; } # Many hosts have no DNS names $DNScache{$ipnum} = join('|', $host, time); return $host; } # =========================================================================== # This is a modified (by Roy Fielding) version of Perl 4.036's ctime.pl # library by Waldemar Kebsch <kebsch.pad@nixpbe.UUCP> and # Marion Hakanson <hakanson@cse.ogi.edu>. It is distributed under the # Artistic License (included with your Perl distribution files). # # # wtime returns a time string in the format "Wkd, Dy Mon Year HH:MM:SS Zone" # with no newline appended. # # USAGE: # # wtime(time,''); -- returns the local time with no timezone appended # As in "Wed, 15 Dec 1993 23:59:59 " # # wtime(time,'GMT'); -- returns GMT time # As in "Wed, 16 Dec 1993 07:59:59 GMT" # sub wtime { local($time, $TZ) = @_; local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst); local(@DoW) = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat'); local(@MoY) = ('Jan','Feb','Mar','Apr','May','Jun', 'Jul','Aug','Sep','Oct','Nov','Dec'); # Determine what time zone is in effect. Use local time if # TZ is anything other than 'GMT' # There's no portable way to find the system default timezone. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = ($TZ eq 'GMT') ? gmtime($time) : localtime($time); $year += ($year < 70) ? 2000 : 1900; sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $DoW[$wday], $mday, $MoY[$mon], $year, $hour, $min, $sec, $TZ); }