#!/usr/bin/perl -w

use Getopt::Long;

$MAJOR_VERSION = 0;
$MINOR_VERSION = 1;
$VERSION = "$MAJOR_VERSION.$MINOR_VERSION";

sub help {
  print <<EOF
Usage: wordstat [options] [files ...]

  wordstat - wc on steroids
  
  -c, --bytes, --chars     print the byte counts
  -l, --lines              print the newline counts
  -w, --words              print the word counts
  -a, --averages           print the average word length
  -s, --syllables          print the average syllables per word
  -e, --english            use descriptive output
  -t, --tabular            use tabular output

  -h, --help               this help message
  -v, --version            version information
EOF
  ; exit;
}

sub version {
  print "wordstat version $VERSION\n";
  exit;
}

# Parse options
GetOptions ('c!','chars!','bytes!','w!','words!','l!','lines!',
        'a!','averages!','s!','syllables!','e!','english!',
        't!','tabular!','h','help','v','version');
help() if ($opt_h || $opt_help);
version() if ($opt_v || $opt_version);
$opt_chars = $opt_bytes if defined $opt_bytes;
$opt_chars = $opt_c if defined $opt_c;
$opt_words = $opt_w if defined $opt_w;
$opt_lines = $opt_l if defined $opt_l;
$opt_averages = $opt_a if defined $opt_a;
$opt_syllables = $opt_s if defined $opt_s;
$opt_english = $opt_e if defined $opt_e;
$opt_tabluar = $opt_t if defined $opt_t;
# If no selection options are given, show everything
if (! (defined $opt_chars || defined $opt_words || defined $opt_lines
        || defined $opt_averages || defined $opt_syllables)) {
  $opt_chars = 1;
  $opt_words = 1;
  $opt_lines = 1;
  $opt_averages = 1;
  $opt_syllables = 1;
}

# Act on stdin if no files are given
push (@ARGV, '-') unless @ARGV;

# Read in all files
foreach $file (@ARGV) {
  open (INPUT, $file);
  $line_count = 0;
  $word_count = 0;
  $char_count = 0;
  $word_lengths = 0;
  $word_syllables = 0;
  # Loop through the file
  while (<>) {
    @F = split (/\s+/, $_);
    $line_count ++;
    $word_count += $#F;
    $char_count += length($_);
    $word_lengths += length($_ =~ s/\s//g) if $opt_averages;
    if ($opt_syllables) {
      foreach $F (@F) {
        # Strip silent e's
        $F =~ s/([^aeiou])e$/$1/;
        @syllables = split (/[aeiouy]+/, $F);
        $word_syllables += $#syllables;
      }
    }
  }
  # Add the results to the total
  $total_char_count += $char_count;
  $total_word_count += $word_count;
  $total_line_count += $line_count;
  $total_lengths += $word_lengths;
  $total_syllables += $word_syllables;
  # Print the results
  printf(" %7d", $line_count) if $opt_lines;
  printf(" %7d", $word_count) if $opt_words;
  printf(" %7d", $char_count) if $opt_chars;
  if ($word_count > 0) {
    printf(" %7d", $word_lengths / $word_count) if $opt_averages;
    printf(" %7d", $word_syllables / $word_count) if $opt_syllables;
  }
  print " $file\n";
  close (INPUT);
}

# Print the totals if there were multiple files
if ($#ARGV > 0) {
  printf(" %7d", $total_line_count) if $opt_lines;
  printf(" %7d", $total_word_count) if $opt_words;
  printf(" %7d", $total_char_count) if $opt_chars;
  if ($total_word_count > 0) {
    printf(" %7d", $total_lengths / $total_word_count) if $opt_averages;
    printf(" %7d", $total_syllables / $total_word_count) if $opt_syllables;
  }
  print " total\n";
}
