#!/usr/bin/env perl # cloc -- Count Lines of Code {{{1 # Copyright (C) 2006-2008 Northrop Grumman Corporation # Author: Al Danial # First release August 2006 # # Includes code from: # - SLOCCount v2.26 # http://www.dwheeler.com/sloccount/ # by David Wheeler. # - Regexp::Common v2.120 # http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm # by Damian Conway and Abigail # - Win32::Autoglob # http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm # by Sean M. Burke # - Algorithm::Diff # http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm # by Tye McQueen # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details: # http://www.gnu.org/licenses/gpl.txt # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 1}}} my $VERSION = 1.04; require 5.006; # use modules {{{1 use warnings; use strict; use Getopt::Long; use File::Basename; use File::Temp qw { tempfile tempdir }; use File::Find; use File::Path; use IO::File; # Digest::MD5 isn't in the standard distribution. Use it only if installed. my $HAVE_Digest_MD5 = 0; eval "use Digest::MD5;"; if (defined $Digest::MD5::VERSION) { $HAVE_Digest_MD5 = 1; } else { warn "Digest::MD5 not installed; will skip file uniqueness checks.\n"; } my $HAVE_Rexexp_Common; # Regexp::Common isn't in the standard distribution. It will # be installed in a temp directory if necessary. BEGIN { if (eval "use Regexp::Common;") { $HAVE_Rexexp_Common = 1; } else { $HAVE_Rexexp_Common = 0; } } my $HAVE_Algorith_Diff = 0; # Algorithm::Diff isn't in the standard distribution. It will # be installed in a temp directory if necessary. eval "use Algorithm::Diff;"; if (defined $Algorithm::Diff::VERSION) { $HAVE_Algorith_Diff = 1; } else { Install_Algorithm_Diff(); } # print "HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n"; # test_alg_diff("cloc-0.80.pl", "cloc-0.90.pl", ); die; # Uncomment next two lines when building Windows executable with perl2exe # or if running on a system that already has Regexp::Common. #use Regexp::Common; #$HAVE_Rexexp_Common = 1; #perl2exe_include "Regexp/Common/whitespace.pm" #perl2exe_include "Regexp/Common/URI.pm" #perl2exe_include "Regexp/Common/URI/fax.pm" #perl2exe_include "Regexp/Common/URI/file.pm" #perl2exe_include "Regexp/Common/URI/ftp.pm" #perl2exe_include "Regexp/Common/URI/gopher.pm" #perl2exe_include "Regexp/Common/URI/http.pm" #perl2exe_include "Regexp/Common/URI/pop.pm" #perl2exe_include "Regexp/Common/URI/prospero.pm" #perl2exe_include "Regexp/Common/URI/news.pm" #perl2exe_include "Regexp/Common/URI/tel.pm" #perl2exe_include "Regexp/Common/URI/telnet.pm" #perl2exe_include "Regexp/Common/URI/tv.pm" #perl2exe_include "Regexp/Common/URI/wais.pm" #perl2exe_include "Regexp/Common/CC.pm" #perl2exe_include "Regexp/Common/SEN.pm" #perl2exe_include "Regexp/Common/number.pm" #perl2exe_include "Regexp/Common/delimited.pm" #perl2exe_include "Regexp/Common/profanity.pm" #perl2exe_include "Regexp/Common/net.pm" #perl2exe_include "Regexp/Common/zip.pm" #perl2exe_include "Regexp/Common/comment.pm" #perl2exe_include "Regexp/Common/balanced.pm" #perl2exe_include "Regexp/Common/lingua.pm" #perl2exe_include "Regexp/Common/list.pm" #perl2exe_include "File/Glob.pm" use Text::Tabs qw { expand }; use Cwd qw { cwd }; # 1}}} # Usage information, options processing. {{{1 my $script = basename $0; my $usage = " Usage: $script [options] | Count physical lines of source code in the given files and/or recursively below the given directories. Options: --by-file Report results for every source file encountered in addition to reporting by language. --categorized= Save names of categorized files to . --counted= Save names of processed source files to . --exclude-dir=[,D2,] Exclude the given comma separated directories D1, D2, D3, et cetera, from being scanned. For example --exclude-dir=.cvs,.svn will skip all files that have /.cvs/ or /.svn/ as part of their path. --exclude-lang=[,L2,] Exclude the given comma separated languages L1, L2, L3, et cetera, from being counted. --extract-with= Use to extract binary archive files (e.g.: .tar.gz, .zip, .Z). Use the literal '>FILE<' as a stand-in for the actual file(s) to be extracted. For example, to count lines of code in the input files gcc-4.2.tar.gz perl-5.8.8.tar.gz on Unix use --extract-with='gzip -dc >FILE< | tar xf -' or, if you have GNU tar, --extract-with='tar zxf >FILE<' and on Windows use: --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\" (if WinZip is installed there). --force-lang=[,] Process all files that have a extension with the counter for language . For example, to count all .f files with the Fortran 90 counter (which expects files to end with .f90) instead of the default Fortran 77 counter, use --force-lang=\"Fortran 90\",f If is omitted, every file will be counted with the counter. This option can be specified multiple times (but that is only useful when is given each time). See also --script-lang. --found= Save names of every file found to . --ignored= Save names of ignored files and the reason they were ignored to . --no3 Suppress third-generation language output. This option can cause report summation to fail if some reports were produced with this option while others were produced without it. --print-filter-stages Print to STDOUT processed source code before and after each filter is applied. --progress-rate= Show progress update after every files are processed (default =100). Set to 0 to suppress progress output (useful when redirecting output to STDOUT). --quiet Suppress all information messages except for the final report. --report-file= Write the results to instead of STDOUT. --read-lang-def= Load from the language processing filters. (see also --write-lang-def) then use these filters instead of the built-in filters. --script-lang=, Process all files that invoke as a #! scripting language with the counter for language . For example, files that begin with #!/usr/local/bin/perl5.8.8 will be counted with the Perl counter by using --script-lang=Perl,perl5.8.8 The language name is case insensitive but the name of the script language executable, , must have the right case. This option can be specified multiple times. See also --force-lang. --sdir= Use as the scratch directory instead of letting File::Temp chose the location. Files written to this location are not removed at the end of the run (as they are with File::Temp). --show-ext[=] Print information about all known (or just the given) file extensions and exit. --show-lang[=] Print information about all known (or just the given) languages and exit. --strip-comments= For each file processed, write to the current directory a version of the file which has blank lines and comments removed. The name of each stripped file is the original file name with . appended to it. --sum-reports Input arguments are report files previously created with the --report-file option. Makes a cumulative set of results containing the sum of data from the individual report files. --unicode Check binary files to see if they contain Unicode expanded ASCII text. This causes performance to drop noticably. --write-lang-def= Writes to the language processing filters then exits. Useful as a first step to creating custom language definitions (see --read-lang-def). -v[=] Verbose switch (optional numeric value). --version Print the version of this program and exit. --csv Write the results as comma separated values. --xml Write the results in XML. --yaml Write the results in YAML. "; # Help information for options not yet implemented: # --diff Shows counts of code and comment changes between # successive pairs of inputs: # delta(FD_1, FD_2), delta(FD_2, FD_3), etc # (FD means 'file, archive, or directory') # --html Create HTML files of each input file showing # comment and code lines in different colors. $| = 1; # flush STDOUT my $start_time = time(); my ( $opt_categorized , $opt_found , @opt_force_lang , @opt_script_lang , $opt_diff , $opt_html , $opt_ignored , $opt_counted , $opt_show_ext , $opt_show_lang , $opt_progress_rate , $opt_print_filter_stages , $opt_v , $opt_version , $opt_exclude_lang , $opt_exclude_dir , $opt_read_lang_def , $opt_write_lang_def , $opt_strip_comments , $opt_quiet , $opt_report_file , $opt_sdir , $opt_sum_reports , $opt_unicode , $opt_no3 , $opt_extract_with , $opt_by_file , $opt_xml , $opt_yaml , $opt_csv , ); GetOptions( "by_file|by-file" => \$opt_by_file , "categorized=s" => \$opt_categorized , "counted=s" => \$opt_counted , "exclude_lang|exclude-lang=s" => \$opt_exclude_lang , "exclude_dir|exclude-dir=s" => \$opt_exclude_dir , "extract_with|extract-with=s" => \$opt_extract_with , "found=s" => \$opt_found , "diff" => \$opt_diff , "html" => \$opt_html , "ignored=s" => \$opt_ignored , "quiet" => \$opt_quiet , "read_lang_def|read-lang-def=s" => \$opt_read_lang_def , "show_ext|show-ext:s" => \$opt_show_ext , "show_lang|show-lang:s" => \$opt_show_lang , "progress_rate|progress-rate=i" => \$opt_progress_rate , "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages , "report_file|report-file=s" => \$opt_report_file , "script_lang|script-lang=s" => \@opt_script_lang , "sdir=s" => \$opt_sdir , "strip_comments|strip-comments=s" => \$opt_strip_comments , "sum_reports|sum-reports" => \$opt_sum_reports , "unicode" => \$opt_unicode , "no3" => \$opt_no3 , "v:i" => \$opt_v , "version" => \$opt_version , "write_lang_def|write-lang-def=s" => \$opt_write_lang_def , "xml" => \$opt_xml , "force_lang|force-lang=s" => \@opt_force_lang , "yaml" => \$opt_yaml , "csv" => \$opt_csv , ); my %Exclude_Language = (); %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang) if $opt_exclude_lang; my %Exclude_Dir = (); %Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir ) if $opt_exclude_dir ; # Options defaults: $opt_progress_rate = 100 unless defined $opt_progress_rate; $opt_v = 0 unless defined $opt_v; die $usage unless defined $opt_version or defined $opt_show_lang or defined $opt_show_ext or defined $opt_write_lang_def or scalar @ARGV >= 1; die "--diff requires at least two arguments\n" if $opt_diff and scalar @ARGV < 2; # 1}}} # Step 1: Initialize global constants. {{{1 # my $ON_WINDOWS = 0; $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT"); $ON_WINDOWS = 0 if $ENV{'SHELL'}; # make Cygwin look like Unix my $nFiles_Found = 0; # updated in make_file_list my (%Language_by_Extension, %Language_by_Script, %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename, %Language_by_File, %Scale_Factor, %Known_Binary_Archives, ); my %Error_Codes = ( 'Unable to read' => -1, 'Neither file nor directory' => -2, ); if ($opt_read_lang_def) { read_lang_def( $opt_read_lang_def , # Sample values: \%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77' \%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell' \%Language_by_File , # Language_by_File{makefile} = 'make' \%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] = # [ 'remove_matches' , '^\s*#' ] \%Not_Code_Extension , # Not_Code_Extension{jpg} = 1 \%Not_Code_Filename , # Not_Code_Filename{README} = 1 \%Scale_Factor , # Scale_Factor{Perl} = 4.0 ); } else { set_constants( # \%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77' \%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell' \%Language_by_File , # Language_by_File{makefile} = 'make' \%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] = # [ 'remove_matches' , '^\s*#' ] \%Not_Code_Extension , # Not_Code_Extension{jpg} = 1 \%Not_Code_Filename , # Not_Code_Filename{README} = 1 \%Scale_Factor , # Scale_Factor{Perl} = 4.0 \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1 ); } # Process command line provided extention-to-language mapping overrides. # Make a hash of known languages in lower case for easier matching. my %Recognized_Language_lc = (); # key = language name in lc, value = true name foreach my $language (keys %Filters_by_Language) { my $lang_lc = lc $language; $Recognized_Language_lc{$lang_lc} = $language; } my %Forced_Extension = (); # file name extensions which user wants to count my $All_One_Language = 0; # set to !0 if --force-lang's is missing foreach my $pair (@opt_force_lang) { my ($lang, $extension) = split(',', $pair); my $lang_lc = lc $lang; if (defined $extension) { $Forced_Extension{$extension} = $lang; die_unknown_lang($lang, "--force-lang") unless $Recognized_Language_lc{$lang_lc}; $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc}; } else { # the scary case--count everything as this language $All_One_Language = $Recognized_Language_lc{$lang_lc}; } } foreach my $pair (@opt_script_lang) { my ($lang, $script_name) = split(',', $pair); my $lang_lc = lc $lang; if (!defined $script_name) { die "The --script-lang option requires a comma separated pair of ". "strings.\n"; } die_unknown_lang($lang, "--script-lang") unless $Recognized_Language_lc{$lang_lc}; $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc}; } # invert %Language_by_Script hash to get an easy-to-look-up list of known # scripting languages my %Script_Language = map { $_ => 1 } values %Language_by_Script ; # 1}}} # Step 2: Early exits for display, summation. {{{1 # if ($opt_version) { printf "%.2f\n", $VERSION; exit; } print_extension_info($opt_show_ext ) if defined $opt_show_ext ; print_language_info( $opt_show_lang) if defined $opt_show_lang; exit if (defined $opt_show_ext) or (defined $opt_show_lang); # Windows doesn't expand wildcards. Use code from Sean M. Burke's # Win32::Autoglob module to do this. #print "Before glob have [", join(",", @ARGV), "]\n"; @ARGV = map {; ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_ } @ARGV if $ON_WINDOWS; ; #print "after glob have [", join(",", @ARGV), "]\n"; if ($opt_sum_reports) { my %Results = (); foreach my $type( "by language", "by report file" ) { my $found_lang = combine_results(\@ARGV, $type, \%{$Results{ $type }}, \%Filters_by_Language ); next unless %Results; my $end_time = time(); my @results = generate_report($VERSION, $end_time - $start_time, $type, \%{$Results{ $type }}, \%Scale_Factor); if ($opt_report_file) { my $ext = ".lang"; $ext = ".file" unless $type eq "by language"; next if !$found_lang and $ext eq ".lang"; write_file($opt_report_file . $ext, @results); } else { print "\n", join("\n", @results), "\n"; } } exit; } if ($opt_write_lang_def) { write_lang_def($opt_write_lang_def , \%Language_by_Extension, \%Language_by_Script , \%Language_by_File , \%Filters_by_Language , \%Not_Code_Extension , \%Not_Code_Filename , \%Scale_Factor , ); exit; } # 1}}} # Step 3: Create a list of files to consider. {{{1 # a) If inputs are binary archives, first cd to a temp # directory, expand the archive with the user-given # extraction tool, then add the temp directory to # the list of dirs to process. # b) Create a list of every file that might contain source # code. Ignore binary files, zero-sized files, and # any file in a directory the user says to exclude. # c) Determine the language for each file in the list. # my @binary_archive = (); if ($opt_extract_with) { my $cwd = cwd(); #print "cwd main = [$cwd]\n"; my @extract_location = (); foreach my $bin_file (@ARGV) { my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit chdir $extract_dir; print "Using temp dir [$extract_dir] to extract $bin_file\n" if $opt_v; my $bin_file_full_path = ""; if (File::Spec->file_name_is_absolute( $bin_file )) { $bin_file_full_path = $bin_file; #print "bin_file_full_path (was ful) = [$bin_file_full_path]\n"; } else { $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file ); #print "bin_file_full_path (was rel) = [$bin_file_full_path]\n"; } (my $extract_cmd = $opt_extract_with ) =~ s/>FILEFILE 1 ); # 1 = delete on exit chdir $extract_dir; print $extract_cmd, "\n"; system $extract_cmd; push @extract_location, $extract_dir; unlink $archive; # otherwise will be extracting it forever } $count_binary_archives = scalar @binary_archive; if ($count_binary_archives == $previous_count) { $binary_archives_exist = 0; } $previous_count = $count_binary_archives; } chdir $cwd; @ARGV = @extract_location; } # 1}}} my @Errors = (); my @file_list = (); # global variable updated in files() my %Ignored = (); # files that are not counted (language not recognized or # problems reading the file) if ($opt_diff) { # Step 4: Separate code from non-code files. {{{1 my @fh; my @files_for_set = (); # make file lists for each separate argument for (my $i = 0; $i < scalar @ARGV; $i++) { push @fh, make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored); @{$files_for_set[$i]} = @file_list; @file_list = (); } # 1}}} # Step 5: Remove duplicate files. {{{1 # my %Language = (); my %unique_source_file = (); my $n_set = 0; foreach my $FH (@fh) { # loop over each pair of file sets ++$n_set; remove_duplicate_files($FH, \%{$Language{$FH}} , \%{$unique_source_file{$FH}} , \%Error_Codes , \@Errors , \%Ignored ); printf "%2d: %8d unique file%s. \n", $n_set, plural_form(scalar keys %unique_source_file) unless $opt_quiet; } # 1}}} # Step 6: Count code, comments, blank lines. {{{1 # my %n_Files_Same = (); my %n_Files_Added = (); my %n_Files_Removed = (); my %n_Files_Changed = (); my %Results_by_Language = (); my %Results_by_File = (); foreach (my $F = 0; $F < scalar @fh - 1; $F++) { # loop over file sets; do diff between set $F to $F+1 my $nCounted = 0; my @changed_file_pairs = (); align_by_pairs(\%{$unique_source_file{$fh[$F ]}}, # in \%{$unique_source_file{$fh[$F+1]}}, # in \$n_Files_Same{$fh[$F]} , # out \$n_Files_Added{$fh[$F]} , # out \$n_Files_Removed{$fh[$F]} , # out \$n_Files_Changed{$fh[$F]} , # out \@changed_file_pairs , # out ); foreach my $pair (@changed_file_pairs) { my $file_L = $pair->[0]; my $file_R = $pair->[1]; #print "main step 6 file_L=$file_L file_R=$file_R\n"; ++$nCounted; printf "Counting: %d\r", $nCounted unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate)); next if $Ignored{$file_L}; if ($Exclude_Language{$Language{$fh[$F ]}{$file_L}} or $Exclude_Language{$Language{$fh[$F+1]}{$file_R}} ) { $Ignored{$file_L} = "--exclude_lang=$Language{$fh[$F ]}{$file_L}"; $Ignored{$file_R} = "--exclude_lang=$Language{$fh[$F+1]}{$file_R}"; next; } if (!defined @{$Filters_by_Language{$Language{$fh[$F ]}{$file_L}} } or !defined @{$Filters_by_Language{$Language{$fh[$F+1]}{$file_R}} } ) { if (($Language{$fh[$F ]}{$file_L} eq "(unknown)") or ($Language{$fh[$F+1]}{$file_R} eq "(unknown)") ) { $Ignored{$fh[$F ]}{$file_L} = "language unknown (#1)"; $Ignored{$fh[$F+1]}{$file_R} = "language unknown (#1)"; } else { $Ignored{$fh[$F ]}{$file_L} = "missing Filters_by_Language{$Language{$file_L}}"; $Ignored{$fh[$F+1]}{$file_R} = "missing Filters_by_Language{$Language{$file_R}}"; } next; } print "DIFF($file_L, $file_R)\n"; # step 1: identify comments in both files chomp ( my @lines_L = read_file($file_L) ); my @original_minus_blanks_L = rm_blanks( \@lines_L, $Language{$fh[$F]}{$file_L}); @lines_L = @original_minus_blanks_L; @lines_L = rm_comments(\@lines_L, $Language{$fh[$F]}{$file_L}, $file_L); chomp ( my @lines_R = read_file($file_R) ); my @original_minus_blanks_R = rm_blanks( \@lines_R, $Language{$fh[$F+1]}{$file_R}); @lines_R = @original_minus_blanks_R; @lines_R = rm_comments(\@lines_R, $Language{$fh[$F+1]}{$file_R}, $file_R); my (@diff_LL, @diff_LR, %count_L); file_diff(\@original_minus_blanks_L , # in \@lines_L , # in "comment" , # in \@diff_LL, \@diff_LR, \%count_L); # out my (@diff_RL, @diff_RR, %count_R); file_diff(\@original_minus_blanks_R , # in \@lines_R , # in "comment" , # in \@diff_RL, \@diff_RR, \%count_R); # out #use Data::Dumper; #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, "count", \%count_L); #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, "count", \%count_R); #v# my ($all_line_count, #v# $blank_count , #v# $comment_count , #v# ) = call_counter($file_L, $Language{$file_L}); #v# my $code_count = $all_line_count - $blank_count - $comment_count; #v# if ($opt_by_file) { #v# $Results_by_File{$file_L}{'code' } = $code_count ; #v# $Results_by_File{$file_L}{'blank' } = $blank_count ; #v# $Results_by_File{$file_L}{'comment'} = $comment_count ; #v# $Results_by_File{$file_L}{'lang' } = $Language{$file_L}; #v# $Results_by_File{$file_L}{'nFiles' } = 1; #v# } #v# #v# $Results_by_Language{$Language{$file_L}}{'nFiles'}++; #v# $Results_by_Language{$Language{$file_L}}{'code'} += $code_count ; #v# $Results_by_Language{$Language{$file_L}}{'blank'} += $blank_count ; #v# $Results_by_Language{$Language{$file_L}}{'comment'} += $comment_count; } #v# my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored; #v# write_file($opt_ignored, @ignored_reasons ) if $opt_ignored; #v# write_file($opt_counted, sort keys %Language) if $opt_counted; } # 1}}} # Step 7: Print results. {{{1 # my $end_time = time(); printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) unless $opt_quiet; print_errors(\%Error_Codes, \@Errors) if @Errors; exit unless %Results_by_Language; #use YAML; print YAML::Dump(\%Results_by_Language); die; my @results = (); unless ($opt_by_file) { @results = generate_report( $VERSION, $end_time - $start_time, "by language", \%Results_by_Language, \%Scale_Factor); if ($opt_report_file) { write_file($opt_report_file, @results); } else { print "\n", join("\n", @results), "\n"; } } else { @results = generate_report( $VERSION, $end_time - $start_time, "by file", \%Results_by_File, \%Scale_Factor); if ($opt_report_file) { write_file($opt_report_file, @results); } else { print "\n", join("\n", @results), "\n"; } } # 1}}} } else { # Step 4: Separate code from non-code files. {{{1 my $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored); # make_file_list populates global variable @file_list via call to # File::Find's find() which in turn calls files() # 1}}} # Step 5: Remove duplicate files. {{{1 # my %Language = (); my %unique_source_file = (); remove_duplicate_files($fh, \%Language , \%unique_source_file , \%Error_Codes , \@Errors , \%Ignored ); printf "%8d unique file%s. \n", plural_form(scalar keys %unique_source_file) unless $opt_quiet; # 1}}} # Step 6: Count code, comments, blank lines. {{{1 # my %Results_by_Language = (); my %Results_by_File = (); my $nCounted = 0; foreach my $file (sort keys %unique_source_file) { ++$nCounted; printf "Counting: %d\r", $nCounted unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate)); next if $Ignored{$file}; if ($Exclude_Language{$Language{$file}}) { $Ignored{$file} = "--exclude_lang=$Language{$file}"; next; } if (!defined @{$Filters_by_Language{$Language{$file}} }) { if ($Language{$file} eq "(unknown)") { $Ignored{$file} = "language unknown (#1)"; } else { $Ignored{$file} = "missing Filters_by_Language{$Language{$file}}"; } next; } my ($all_line_count, $blank_count , $comment_count , ) = call_counter($file, $Language{$file}); my $code_count = $all_line_count - $blank_count - $comment_count; if ($opt_by_file) { $Results_by_File{$file}{'code' } = $code_count ; $Results_by_File{$file}{'blank' } = $blank_count ; $Results_by_File{$file}{'comment'} = $comment_count ; $Results_by_File{$file}{'lang' } = $Language{$file}; $Results_by_File{$file}{'nFiles' } = 1; } $Results_by_Language{$Language{$file}}{'nFiles'}++; $Results_by_Language{$Language{$file}}{'code'} += $code_count ; $Results_by_Language{$Language{$file}}{'blank'} += $blank_count ; $Results_by_Language{$Language{$file}}{'comment'} += $comment_count; } my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored; write_file($opt_ignored, @ignored_reasons ) if $opt_ignored; write_file($opt_counted, sort keys %Language) if $opt_counted; # 1}}} # Step 7: Print results. {{{1 # my $end_time = time(); printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) unless $opt_quiet; print_errors(\%Error_Codes, \@Errors) if @Errors; exit unless %Results_by_Language; #use YAML; print YAML::Dump(\%Results_by_Language); die; my @results = (); unless ($opt_by_file) { @results = generate_report( $VERSION, $end_time - $start_time, "by language", \%Results_by_Language, \%Scale_Factor); if ($opt_report_file) { write_file($opt_report_file, @results); } else { print "\n", join("\n", @results), "\n"; } } else { @results = generate_report( $VERSION, $end_time - $start_time, "by file", \%Results_by_File, \%Scale_Factor); if ($opt_report_file) { write_file($opt_report_file, @results); } else { print "\n", join("\n", @results), "\n"; } } # 1}}} } sub combine_results { # {{{1 # returns 1 if the inputs are categorized by language # 0 if no identifiable language was found my ($ra_report_files, # in $report_type , # in "by language" or "by report file" $rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled} $rhaa_Filters_by_Language , # in ) = @_; my $found_language = 0; foreach my $file (@{$ra_report_files}) { my $IN = new IO::File $file, "r"; if (!defined $IN) { warn "Unable to read $file; ignoring.\n"; next; } while (<$IN>) { next if /^(http|Language|SUM|-----)/; if (m{^(.*?)\s+ # language (\d+)\s+ # files (\d+)\s+ # blank (\d+)\s+ # comments (\d+)\s+ # code ( # next four entries missing with -nno3 x\s+ # x \d+\.\d+\s+ # scale =\s+ # = (\d+\.\d+)\s* # scaled code )? $}x) { if ($report_type eq "by language") { next unless defined %{$rhaa_Filters_by_Language->{$1}}; # above test necessary to avoid trying to sum reports # of reports (which have no language breakdown). $found_language = 1; $rhh_count->{$1 }{'nFiles' } += $2; $rhh_count->{$1 }{'blank' } += $3; $rhh_count->{$1 }{'comment'} += $4; $rhh_count->{$1 }{'code' } += $5; $rhh_count->{$1 }{'scaled' } += $7 unless $opt_no3; } else { $rhh_count->{$file}{'nFiles' } += $2; $rhh_count->{$file}{'blank' } += $3; $rhh_count->{$file}{'comment'} += $4; $rhh_count->{$file}{'code' } += $5; $rhh_count->{$file}{'scaled' } += $7 unless $opt_no3; } } } } return $found_language; } # 1}}} sub generate_report { # {{{1 # returns an array of lines containing the results my ($version , # in $elapsed_sec, # in $report_type, # in "by language" | "by report file" | "by file" $rhh_count , # in count{TYPE}{nFiles|code|blank|comment|scaled} $rh_scale , # in ) = @_; my @results = (); my $languages = (); my $sum_files = 0; my $sum_code = 0; my $sum_blank = 0; my $sum_comment = 0; foreach my $language (keys %{$rhh_count}) { $sum_files += $rhh_count->{$language}{'nFiles'} ; $sum_blank += $rhh_count->{$language}{'blank'} ; $sum_comment += $rhh_count->{$language}{'comment'}; $sum_code += $rhh_count->{$language}{'code'} ; } my $sum_lines = $sum_blank + $sum_comment + $sum_code; $elapsed_sec = 0.5 unless $elapsed_sec; my $spacing_1 = 13; my $spacing_2 = 9; my $spacing_3 = 17; if ($opt_no3) { $spacing_1 = 19; $spacing_2 = 14; $spacing_3 = 28; } my %Format = ( '1' => { 'xml' => 'name="%s" ', 'txt' => '%-23s ' , }, '2' => { 'xml' => 'name="%s" ', 'txt' => "\%-${spacing_3}s ", }, '3' => { 'xml' => 'files_count="%d" ', 'txt' => '%5d ', }, '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ', 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d", }, '5' => { 'xml' => 'factor="%.2f" scaled="%.2f" ', 'txt' => ' x %6.2f = %14.2f', }, ); my $Style = "txt"; $Style = "xml" if $opt_xml ; $Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt $Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt my $URL = "http://cloc.sourceforge.net"; my $hyphen_line = sprintf "%s", '-' x 79; my $data_line = ""; my $first_column; my $BY_LANGUAGE = 0; my $BY_FILE = 0; if ($report_type eq "by language") { $first_column = "Language"; $BY_LANGUAGE = 1; } elsif ($report_type eq "by file") { $first_column = "File"; $BY_FILE = 1; } else { $first_column = "Report File"; } my $header_line = sprintf "%s v %4.2f", $URL, $version; $header_line .= sprintf(" T=%.1f s (%.1f files/s, %.1f lines/s)", $elapsed_sec , $sum_files/$elapsed_sec, $sum_lines/$elapsed_sec) unless $opt_sum_reports; if ($opt_xml) { push @results, ""; push @results, ""; push @results, "
$header_line
"; if ($BY_FILE) { push @results, ""; } else { push @results, ""; } } elsif ($opt_yaml) { push @results, "---\n# $header_line"; } elsif ($opt_csv) { # append the header to the end of the column headers # to keep the output a bit cleaner from a spreadsheet # perspective } else { push @results, $header_line; push @results, $hyphen_line; } if ($Style eq "txt") { # column headers $data_line = sprintf "%-${spacing_1}s ", $first_column; if ($BY_FILE) { $data_line .= sprintf "%${spacing_2}s " , " " ; } else { $data_line .= sprintf "%${spacing_2}s " , "files"; } $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s", "blank" , "comment" , "code"; $data_line .= sprintf " %8s %14s", "scale" , "3rd gen. equiv" unless $opt_no3; push @results, $data_line; push @results, $hyphen_line; } if ($opt_csv) { my $header2 = " ,"; $header2 = " ,files" unless $BY_FILE; $header2 .= ",blank,comment,code"; $header2 .= ",scale,3rd gen. equiv" unless $opt_no3; $header2 .= ',"' . $header_line . '"'; push @results, $header2; } my $sum_scaled = 0; foreach my $lang_or_file (sort { $rhh_count->{$b}{'code'} <=> $rhh_count->{$a}{'code'} } keys %{$rhh_count}) { my ($factor, $scaled); if ($BY_LANGUAGE or $BY_FILE) { $factor = 1; if ($BY_LANGUAGE) { if (defined $rh_scale->{$lang_or_file}) { $factor = $rh_scale->{$lang_or_file}; } else { warn "No scale factor for $lang_or_file; using 1.00"; } } else { # by individual code file $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}}; } $scaled = $factor*$rhh_count->{$lang_or_file}{'code'}; } else { if (!defined $rhh_count->{$lang_or_file}{'scaled'}) { $opt_no3 = 1; # If we're summing together files previously generated # with --no3 then rhh_count->{$lang_or_file}{'scaled'} # this variable will be undefined. That should only # happen when summing together by file however. } elsif ($BY_LANGUAGE) { warn "Missing scaled language info for $lang_or_file\n"; } unless ($opt_no3) { $scaled = $rhh_count->{$lang_or_file}{'scaled'}; $factor = $scaled/$rhh_count->{$lang_or_file}{'code'}; } } if ($BY_FILE) { $data_line = sprintf $Format{'1'}{$Style}, $lang_or_file; } else { $data_line = sprintf $Format{'2'}{$Style}, $lang_or_file; } $data_line .= sprintf $Format{3}{$Style} , $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE; $data_line .= sprintf $Format{4}{$Style} , $rhh_count->{$lang_or_file}{'blank'} , $rhh_count->{$lang_or_file}{'comment'}, $rhh_count->{$lang_or_file}{'code'} ; $data_line .= sprintf $Format{5}{$Style} , $factor , $scaled unless $opt_no3; $sum_scaled += $scaled unless $opt_no3; if ($opt_xml) { if (defined $rhh_count->{$lang_or_file}{'lang'}) { my $lang = $rhh_count->{$lang_or_file}{'lang'}; if (!defined $languages->{$lang}) { $languages->{$lang} = $lang; } $data_line.=' language="' . $lang . '" '; } if ($BY_FILE) { push @results, ""; } else { push @results, ""; } } elsif ($opt_yaml) { push @results, $lang_or_file . ":"; push @results, " nFiles: " .$rhh_count->{$lang_or_file}{'nFiles'} ; push @results, " blank: " .$rhh_count->{$lang_or_file}{'blank'} ; push @results, " comment: ".$rhh_count->{$lang_or_file}{'comment'}; push @results, " code: " .$rhh_count->{$lang_or_file}{'code'} unless $BY_FILE; if (!$opt_no3) { push @results, " scaled: " . $scaled; push @results, " factor: " . $factor; } } elsif ($opt_csv) { my $extra_3 = ""; $extra_3 = ",$factor,$scaled" unless $opt_no3; push @results, $lang_or_file . "," . $rhh_count->{$lang_or_file}{'nFiles'} . "," . $rhh_count->{$lang_or_file}{'blank'} . "," . $rhh_count->{$lang_or_file}{'comment'}. "," . $rhh_count->{$lang_or_file}{'code'} . $extra_3; } else { push @results, $data_line; } } my $avg_scale = 1; # weighted average of scale factors $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code) if $sum_code and !$opt_no3; if ($opt_xml) { $data_line = ""; if (!$BY_FILE) { $data_line .= sprintf "sum_files=\"%d\" ", $sum_files; } $data_line .= sprintf $Format{'4'}{$Style}, $sum_blank , $sum_comment , $sum_code ; $data_line .= sprintf $Format{'5'}{$Style}, $avg_scale , $sum_scaled unless $opt_no3; push @results, ""; if ($BY_FILE) { push @results, ""; } else { foreach my $language (keys %{$languages}) { push @results, ''; } push @results, ""; } push @results, "
"; } elsif ($opt_yaml) { push @results, "SUM:"; push @results, " blank: " . $sum_blank ; push @results, " code: " . $sum_code ; push @results, " comment: ". $sum_comment; push @results, " nFiles: " . $sum_files unless $BY_FILE; if (!$opt_no3) { push @results, " scaled: " . $sum_scaled; push @results, " factor: " . $avg_scale ; } } elsif ($opt_csv) { # do nothing } else { if ($BY_FILE) { $data_line = sprintf "%-23s ", "SUM:" ; } else { $data_line = sprintf "%-${spacing_1}s ", "SUM:" ; $data_line .= sprintf "%${spacing_2}d ", $sum_files; } $data_line .= sprintf $Format{'4'}{$Style}, $sum_blank , $sum_comment , $sum_code ; $data_line .= sprintf $Format{'5'}{$Style}, $avg_scale , $sum_scaled unless $opt_no3; push @results, $hyphen_line if $sum_files > 1; push @results, $data_line if $sum_files > 1; push @results, $hyphen_line; } return @results; } # 1}}} sub print_errors { # {{{1 my ($rh_Error_Codes, # in $raa_errors , # in ) = @_; my %error_string = reverse(%{$rh_Error_Codes}); my $nErrors = scalar @{$raa_errors}; printf "\n%d error%s:\n", plural_form(scalar @Errors); for (my $i = 0; $i < $nErrors; $i++) { printf "%s: %s\n", $error_string{ $raa_errors->[$i][0] }, $raa_errors->[$i][1] ; } print "\n"; } # 1}}} sub write_lang_def { # {{{1 my ($file , $rh_Language_by_Extension , # in $rh_Language_by_Script , # in $rh_Language_by_File , # in $rhaa_Filters_by_Language , # in $rh_Not_Code_Extension , # in $rh_Not_Code_Filename , # in $rh_Scale_Factor , # in ) = @_; my $OUT = new IO::File $file, "w"; die "Unable to write to $file\n" unless defined $OUT; foreach my $language (sort keys %{$rhaa_Filters_by_Language}) { next if $language eq "MATLAB/Objective C/MUMPS"; printf $OUT "%s\n", $language; foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) { printf $OUT " filter %s", $filter->[0]; printf $OUT " %s", $filter->[1] if defined $filter->[1]; print $OUT "\n"; } foreach my $ext (sort keys %{$rh_Language_by_Extension}) { if ($language eq $rh_Language_by_Extension->{$ext}) { printf $OUT " extension %s\n", $ext; } } foreach my $filename (sort keys %{$rh_Language_by_File}) { if ($language eq $rh_Language_by_File->{$filename}) { printf $OUT " filename %s\n", $filename; } } foreach my $script_exe (sort keys %{$rh_Language_by_Script}) { if ($language eq $rh_Language_by_Script->{$script_exe}) { printf $OUT " script_exe %s\n", $script_exe; } } printf $OUT " 3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language}; } $OUT->close; } # 1}}} sub read_lang_def { # {{{1 my ($file , $rh_Language_by_Extension , # out $rh_Language_by_Script , # out $rh_Language_by_File , # out $rhaa_Filters_by_Language , # out $rh_Not_Code_Extension , # out $rh_Not_Code_Filename , # out $rh_Scale_Factor , # out ) = @_; my $IN = new IO::File $file, "r"; die "Unable to read $file.\n" unless defined $IN; my $language = ""; while (<$IN>) { next if /^\s*#/ or /^\s*$/; if (/^(\w+.*?)\s*$/) { $language = $1; next; } die "Missing computer language name, line $. of $file\n" unless $language; if (/^ filter\s+(\w+)\s*$/) { push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ] } elsif (/^ filter\s+(\w+)\s+(.*?)\s*$/) { push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ] } elsif (/^ extension\s+(\S+)\s*$/) { if (defined $rh_Language_by_Extension->{$1}) { die "File extension collision: $1 ", "maps to languages '$rh_Language_by_Extension->{$1}' ", "and '$language'\n" , "Edit $file and remove $1 from one of these two ", "language definitions.\n"; } $rh_Language_by_Extension->{$1} = $language; } elsif (/^ filename\s+(\S+)\s*$/) { $rh_Language_by_File->{$1} = $language; } elsif (/^ script_exe\s+(\S+)\s*$/) { $rh_Language_by_Script->{$1} = $language; } elsif (/^ 3rd_gen_scale\s+(\S+)\s*$/) { $rh_Scale_Factor->{$language} = $1; } else { die "Unexpected data line $. of $file:\n$_\n"; } } $IN->close; } # 1}}} sub print_extension_info { # {{{1 my ($extension,) = @_; if ($extension) { # show information on this extension foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { # Language_by_Extension{f} = 'Fortran 77' printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext} if $ext =~ m{$extension}i; } } else { # show information on all extensions foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { # Language_by_Extension{f} = 'Fortran 77' printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext}; } } } # 1}}} sub print_language_info { # {{{1 my ($language,) = @_; my %extensions = (); # the subset matched by the given $language value if ($language) { # show information on this language foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { # Language_by_Extension{f} = 'Fortran 77' push @{$extensions{$Language_by_Extension{$ext}} }, $ext if $Language_by_Extension{$ext} =~ m{$language}i; } } else { # show information on all languages foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { # Language_by_Extension{f} = 'Fortran 77' push @{$extensions{$Language_by_Extension{$ext}} }, $ext } } # add exceptions (one file extension mapping to multiple languages) if (!$language or $language =~ /^(Objective C|MATLAB|MUMPS)$/i) { push @{$extensions{'Objective C'}}, "m"; push @{$extensions{'MATLAB'}} , "m"; push @{$extensions{'MUMPS'}} , "m"; delete $extensions{'MATLAB/Objective C/MUMPS'}; } if (%extensions) { foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) { printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}}); } } } # 1}}} sub make_file_list { # {{{1 my ($ra_arg_list, # in file and/or directory names to examine $rh_Err , # in hash of error codes $raa_errors , # out errors encountered $rh_ignored , # out files not recognized as computer languages ) = @_; my ($fh, $filename); if ($opt_categorized) { $filename = $opt_categorized; $fh = new IO::File $filename, "+>"; # open for read/write die "Unable to write to $filename: $!\n" unless defined $fh; } elsif ($opt_sdir) { # write to the user-defined scratch directory $filename = $opt_sdir . '/cloc_file_list.txt'; $fh = new IO::File $filename, "+>"; # open for read/write die "Unable to write to $filename: $!\n" unless defined $fh; } else { # let File::Temp create a suitable temporary file ($fh, $filename) = tempfile(UNLINK => 1); # delete file on exit print "Using temp file list [$filename]\n" if $opt_v; } my @dir_list = (); foreach my $file_or_dir (@{$ra_arg_list}) { #print "make_file_list file_or_dir=$file_or_dir\n"; my $size_in_bytes = 0; if (!-r $file_or_dir) { push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir]; next; } if (is_file($file_or_dir)) { if (!(-s $file_or_dir)) { # 0 sized file, named pipe, socket $rh_ignored->{$file_or_dir} = 'zero sized file'; next; } elsif (-B $file_or_dir) { # avoid binary files if ($opt_unicode) { # only ignore if not a Unicode file w/trivial # ASCII transliteration if (!unicode_file($file_or_dir)) { $rh_ignored->{$file_or_dir} = 'binary file'; next; } } else { $rh_ignored->{$file_or_dir} = 'binary file'; next; } } push @file_list, "$file_or_dir"; } elsif (is_dir ($file_or_dir)) { push @dir_list, $file_or_dir; } else { push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir]; $rh_ignored->{$file_or_dir} = 'not file, not directory'; } } foreach my $dir (@dir_list) { #print "make_file_list dir=$dir\n"; find(\&files, $dir); # populates global variable @file_list } $nFiles_Found = scalar @file_list; printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet; write_file($opt_found, sort @file_list) if $opt_found; my $nFiles_Categorized = 0; foreach my $file (@file_list) { printf "classifying $file\n" if $opt_v > 2; my $basename = basename $file; if ($Not_Code_Filename{$basename}) { $rh_ignored->{$file} = "listed in " . '$' . "Not_Code_Filename{$basename}"; next; } elsif ($basename =~ m{~$}) { $rh_ignored->{$file} = "temporary editor file"; next; } my $size_in_bytes = (stat $file)[7]; my $language = ""; if ($All_One_Language) { # user over-rode auto-language detection by using # --force-lang with just a language name (no extension) $language = $All_One_Language; } else { $language = classify_file($file , $rh_Err , $raa_errors, $rh_ignored); } die "make_file_list($file) undef size" unless defined $size_in_bytes; die "make_file_list($file) undef lang" unless defined $language; printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file; ++$nFiles_Categorized; #printf "classified %d files\n", $nFiles_Categorized # unless (!$opt_progress_rate or # ($nFiles_Categorized % $opt_progress_rate)); } printf "classified %d files\n", $nFiles_Categorized if !$opt_quiet and $nFiles_Categorized > 1; return $fh; # handle to the file containing the list of files to process } # 1}}} sub remove_duplicate_files { # {{{1 my ($fh , # in $rh_Language , # out $rh_unique_source_file, # out $rh_Err , # in $raa_errors , # out errors encountered $rh_ignored , # out ) = @_; # Check for duplicate files by comparing file sizes. # Where files are equally sized, compare their MD5 checksums. my $n = 0; my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ] seek($fh, 0, 0); # rewind to beginning of the temp file while (<$fh>) { ++$n; my ($size_in_bytes, $language, $file) = split(/,/, $_, 3); chomp($file); $rh_Language->{$file} = $language; push @{$files_by_size{$size_in_bytes}}, $file; } if ($opt_progress_rate and ($n > $opt_progress_rate)) { printf "Duplicate file check %d files (%d known unique)\r", $n, scalar keys %files_by_size; } $n = 0; foreach my $bytes (sort {$a <=> $b} keys %files_by_size) { ++$n; printf "Unique: %8d files \r", $n unless (!$opt_progress_rate or ($n % $opt_progress_rate)); $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1; next unless scalar @{$files_by_size{$bytes}} > 1; foreach my $F (different_files(\@{$files_by_size{$bytes}}, $rh_Err , $raa_errors , $rh_ignored ) ) { $rh_unique_source_file->{$F} = 1; } } } # 1}}} sub files { # {{{1 # invoked by File::Find's find() Populates global variable @file_list if ($opt_exclude_dir) { my $return = 0; foreach my $skip_dir (keys %Exclude_Dir) { if ($File::Find::dir =~ m{/$skip_dir(/|$)} ) { $Ignored{$File::Find::name} = "--exclude_dir=$skip_dir"; $return = 1; last; } } return if $return; } my $nBytes = -s $_ ; if (!$nBytes and $opt_v > 5) { printf "files(%s) zero size\n", $File::Find::name; } return unless $nBytes ; # attempting other tests w/pipe or socket will hang my $is_dir = is_dir($_); my $is_bin = -B $_ ; printf "files(%s) size=%d is_dir=%d -B=%d\n", $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5; $is_bin = 0 if $opt_unicode and unicode_file($_); return if $is_dir or $is_bin; ++$nFiles_Found; printf "%8d files\r", $nFiles_Found unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate)); push @file_list, $File::Find::name; } # 1}}} sub archive_files { # {{{1 # invoked by File::Find's find() Populates global variable @binary_archive foreach my $ext (keys %Known_Binary_Archives) { push @binary_archive, $File::Find::name if $File::Find::name =~ m{$ext$}; } } # 1}}} sub is_file { # {{{1 # portable method to test if item is a file # (-f doesn't work in ActiveState Perl on Windows) my $item = shift @_; if ($ON_WINDOWS) { my $mode = (stat $item)[2]; $mode = 0 unless $mode; if ($mode & 0100000) { return 1; } else { return 0; } } else { return (-f $item); # works on Unix, Linux, CygWin, z/OS } } # 1}}} sub is_dir { # {{{1 # portable method to test if item is a directory # (-d doesn't work in ActiveState Perl on Windows) my $item = shift @_; if ($ON_WINDOWS) { my $mode = (stat $item)[2]; $mode = 0 unless $mode; if ($mode & 0040000) { return 1; } else { return 0; } } else { return (-d $item); # works on Unix, Linux, CygWin, z/OS } } # 1}}} sub classify_file { # {{{1 my ($full_file , # in $rh_Err , # in hash of error codes $raa_errors , # out $rh_ignored , # out ) = @_; print "-> classify_file($full_file)\n" if $opt_v > 2; my $language = "(unknown)"; my $look_at_first_line = 0; my $file = basename $full_file; return $language if $Not_Code_Filename{$file}; # (unknown) return $language if $file =~ m{~$}; # a temp edit file (unknown) if ($file =~ /\.(\w+)$/) { # has an extension print "$full_file extension=[$1]\n" if $opt_v > 2; my $extension = $1; if ($Not_Code_Extension{$extension} and !$Forced_Extension{$extension}) { # If .1 (for example) is an extention that would ordinarily be # ignored but the user has insisted this be counted with the # --force-lang option, then go ahead and count it. $rh_ignored->{$full_file} = 'listed in $Not_Code_Extension{' . $extension . '}'; return $language; } if (defined $Language_by_Extension{$extension}) { if ($Language_by_Extension{$extension} eq 'MATLAB/Objective C/MUMPS') { my $lang_M_or_O = ""; matlab_or_objective_C($full_file , $rh_Err , $raa_errors, \$lang_M_or_O); if ($lang_M_or_O) { return $lang_M_or_O; } else { # an error happened in matlab_or_objective_C() $rh_ignored->{$full_file} = 'failure in matlab_or_objective_C()'; return $language; # (unknown) } } else { return $Language_by_Extension{$extension}; } } else { # has an unmapped file extension $look_at_first_line = 1; } } elsif (defined $Language_by_File{lc $file}) { return $Language_by_File{lc $file}; } else { # no file extension $look_at_first_line = 1; } if ($look_at_first_line) { # maybe it is a shell/Perl/Python/Ruby/etc script that # starts with pound bang: # #!/usr/bin/perl # #!/usr/bin/env perl my $script_language = peek_at_first_line($full_file , $rh_Err , $raa_errors); if (!$script_language) { $rh_ignored->{$full_file} = "language unknown (#2)"; # returns (unknown) } if (defined $Language_by_Script{$script_language}) { if (defined $Filters_by_Language{ $Language_by_Script{$script_language}}) { $language = $Language_by_Script{$script_language}; } else { $rh_ignored->{$full_file} = "undefined: Filters_by_Language{" . $Language_by_Script{$script_language} . "} for scripting language $script_language"; # returns (unknown) } } else { $rh_ignored->{$full_file} = "language unknown (#3)"; # returns (unknown) } } print "<- classify_file($full_file)\n" if $opt_v > 2; return $language; } # 1}}} sub peek_at_first_line { # {{{1 my ($file , # in $rh_Err , # in hash of error codes $raa_errors , # out ) = @_; print "-> peek_at_first_line($file)\n" if $opt_v > 2; my $script_language = ""; if (!-r $file) { push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file]; return $script_language; } my $IN = new IO::File $file, "r"; if (!defined $IN) { push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file]; print "<- peek_at_first_line($file)\n" if $opt_v > 2; return $script_language; } chomp(my $first_line = <$IN>); if (defined $first_line) { #print "peek_at_first_line of [$file] first_line=[$first_line]\n"; if ($first_line =~ /^#\!\s*(\S.*?)$/) { #print "peek_at_first_line 1=[$1]\n"; my @pound_bang = split(' ', $1); #print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n"; if (basename($pound_bang[0]) eq "env" and scalar @pound_bang > 1) { $script_language = $pound_bang[1]; #print "peek_at_first_line pound_bang A $pound_bang[1]\n"; } else { $script_language = basename $pound_bang[0]; #print "peek_at_first_line pound_bang B $script_language\n"; } } } $IN->close; print "<- peek_at_first_line($file)\n" if $opt_v > 2; return $script_language; } # 1}}} sub different_files { # {{{1 # See which of the given files are unique by computing each file's MD5 # sum. Return the subset of files which are unique. my ($ra_files , # in $rh_Err , # in $raa_errors , # out $rh_ignored , # out ) = @_; print "-> different_files(@{$ra_files})\n" if $opt_v > 2; my %file_hash = (); # file_hash{ md5 hash } = file name foreach my $F (@{$ra_files}) { next if is_dir($F); # needed for Windows my $IN = new IO::File $F, "r"; if (!defined $IN) { push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F]; $rh_ignored->{$F} = 'cannot read'; } else { if ($HAVE_Digest_MD5) { binmode $IN; $file_hash{ Digest::MD5->new->addfile($IN)->hexdigest } = $F; } else { # all files treated unique $file_hash{ $F } = $F; } $IN->close; } } my @unique = values %file_hash; print "<- different_files(@unique)\n" if $opt_v > 2; return @unique; } # 1}}} sub call_counter { # {{{1 my ($file , # in $language, # in ) = @_; # Logic: pass the file through the following filters: # 1. remove blank lines # 2. remove comments using each filter defined for this language # (example: SQL has two, remove_starts_with(--) and # remove_c_comments() ) # 3. compute comment lines as # total lines - blank lines - lines left over after all # comment filters have been applied print "-> call_counter($file, $language)\n" if $opt_v > 2; #print "call_counter: ", Dumper(@routines), "\n"; my @lines = (); my $ascii = ""; if (-B $file and $opt_unicode) { # was binary so must be unicode $/ = undef; my $IN = new IO::File $file, "r"; my $bin_text = <$IN>; $IN->close; $/ = "\n"; $ascii = unicode_to_ascii( $bin_text ); @lines = split("\n", $ascii ); foreach (@lines) { $_ = "$_\n"; } } else { # regular text file @lines = read_file($file); $ascii = join('', @lines); } my @original_lines = @lines; my $total_lines = scalar @lines; print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages; @lines = rm_blanks(\@lines, $language); # remove blank lines my $blank_lines = $total_lines - scalar @lines; print_lines($file, "Blank lines removed:", \@lines) if $opt_print_filter_stages; @lines = rm_comments(\@lines, $language, $file); my $comment_lines = $total_lines - $blank_lines - scalar @lines; if ($opt_strip_comments) { my $stripped_file = basename $file . ".$opt_strip_comments"; write_file($stripped_file, @lines); } if ($opt_html) { chomp(@original_lines); # includes blank lines, comments chomp(@lines); # no blank lines, no comments my (@diff_L, @diff_R, %count); # remove blank lines to get better quality diffs; count # blank lines separately my @original_lines_minus_white = (); # however must keep track of how many blank lines were removed and # where they were removed so that the HTML display can include it my %blank_line = (); my $insert_line = 0; foreach (@original_lines) { if (/^\s*$/) { ++$count{blank}{same}; ++$blank_line{ $insert_line }; } else { ++$insert_line; push @original_lines_minus_white, $_; } } file_diff(\@original_lines_minus_white , # in \@lines , # in "comment" , # in \@diff_L, \@diff_R, \%count); # out write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line); #print Dumper("count", \%count); } print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n" if $opt_v > 2; return ($total_lines, $blank_lines, $comment_lines); } # 1}}} sub write_file { # {{{1 my ($file , # in @lines , # in ) = @_; print "-> write_file($file)\n" if $opt_v > 2; # Create the destination directory if it doesn't already exist. my $abs_file_path = File::Spec->rel2abs( $file ); my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path ); mkpath($volume . $directories, 1, 0777); my $OUT = new IO::File $file, "w"; if (defined $OUT) { chomp(@lines); print $OUT join("\n", @lines), "\n"; $OUT->close; } else { warn "Unable to write to $file\n"; } print "Wrote $file\n"; print "<- write_file\n" if $opt_v > 2; } # 1}}} sub read_file { # {{{1 my ($file, ) = @_; print "-> read_file($file)\n" if $opt_v > 2; my @lines = (); my $IN = new IO::File $file, "r"; if (defined $IN) { @lines = <$IN>; $IN->close; # Some files don't end with a new line. Force this: $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/; } else { warn "Unable to read $file\n"; } print "<- read_file\n" if $opt_v > 2; return @lines; } # 1}}} sub rm_blanks { # {{{1 my ($ra_in, $language) = @_; my @out = (); if ($language eq "COBOL") { @out = remove_cobol_blanks($ra_in); } else { @out = remove_matches($ra_in, '^\s*$'); # removes blank lines } return @out; } # 1}}} sub rm_comments { # {{{1 my ($ra_lines , # in, must be free of blank lines $language , # in $file , # in (some language counters, eg Haskell, need # access to the original file) ) = @_; my @routines = @{$Filters_by_Language{$language}}; my @lines = @{$ra_lines}; my @original_lines = @{$ra_lines}; foreach my $call_string (@routines) { my $subroutine = $call_string->[0]; if (! defined &{$subroutine}) { warn "rm_comments undefined subroutine $subroutine for $file\n"; next; } print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1; my @args = @{$call_string}; shift @args; # drop the subroutine name if (@args and $args[0] eq '>filename<') { shift @args; unshift @args, $file; } no strict 'refs'; @lines = &{$subroutine}(\@lines, @args); # apply filter... print_lines($file, "After $subroutine(@args)", \@lines) if $opt_print_filter_stages; @lines = remove_matches(\@lines, '^\s*$'); # then remove blank lines print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines) if $opt_print_filter_stages; } # Exception for scripting languages: treat the first #! line as code. # Will need to add it back in if it was removed earlier. if ($Script_Language{$language} and $original_lines[0] =~ /^#!/ and $lines[0] ne $original_lines[0]) { unshift @lines, $original_lines[0]; # add the first line back } return @lines; } # 1}}} sub remove_f77_comments { # {{{1 my ($ra_lines, ) = @_; print "-> remove_f77_comments\n" if $opt_v > 2; my @save_lines = (); foreach (@{$ra_lines}) { next if m{^[*cC]}; push @save_lines, $_; } print "<- remove_f77_comments\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_f90_comments { # {{{1 # derived from SLOCCount my ($ra_lines, ) = @_; print "-> remove_f90_comments\n" if $opt_v > 2; my @save_lines = (); foreach (@{$ra_lines}) { # a comment is m/^\s*!/ # an empty line is m/^\s*$/ # a HPF statement is m/^\s*!hpf\$/i # an Open MP statement is m/^\s*!omp\$/i if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) { push @save_lines, $_; } } print "<- remove_f90_comments\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_matches { # {{{1 my ($ra_lines, # in $pattern , # in Perl regular expression (case insensitive) ) = @_; print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2; my @save_lines = (); foreach (@{$ra_lines}) { #chomp; print "remove_matches [$pattern] [$_]\n"; next if m{$pattern}i; push @save_lines, $_; } print "<- remove_matches\n" if $opt_v > 2; #print "remove_matches returning\n ", join("\n ", @save_lines), "\n"; return @save_lines; } # 1}}} sub remove_above { # {{{1 my ($ra_lines, $marker, ) = @_; print "-> remove_above(marker=$marker)\n" if $opt_v > 2; # Make two passes through the code: # 1. check if the marker exists # 2. remove anything above the marker if it exists, # do nothing if the marker does not exist # Pass 1 my $found_marker = 0; for (my $line_number = 1; $line_number <= scalar @{$ra_lines}; $line_number++) { if ($ra_lines->[$line_number-1] =~ m{$marker}) { $found_marker = $line_number; last; } } # Pass 2 only if needed my @save_lines = (); if ($found_marker) { my $n = 1; foreach (@{$ra_lines}) { push @save_lines, $_ if $n >= $found_marker; ++$n; } } else { # marker wasn't found; save all lines foreach (@{$ra_lines}) { push @save_lines, $_; } } print "<- remove_above\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_below { # {{{1 my ($ra_lines, $marker, ) = @_; print "-> remove_below(marker=$marker)\n" if $opt_v > 2; my @save_lines = (); foreach (@{$ra_lines}) { last if m{$marker}; push @save_lines, $_; } print "<- remove_below\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_between { # {{{1 my ($ra_lines, $marker, ) = @_; # $marker must contain one of the balanced pairs understood # by Regexp::Common::balanced, namely # '{}' '()' '[]' or '<>' print "-> remove_between(marker=$marker)\n" if $opt_v > 2; my %acceptable = ('{}'=>1, '()'=>1, '[]'=>1, '<>'=>1, ); die "remove_between: invalid delimiter '$marker'\n", "the delimiter must be one of these four pairs:\n", "{} () [] <>\n" unless $acceptable{$marker}; Install_Regexp_Common() unless $HAVE_Rexexp_Common; my $all_lines = join("", @{$ra_lines}); no strict 'vars'; # otherwise get: # Global symbol "%RE" requires explicit package name at cloc line xx. if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) { no warnings; $all_lines =~ s/$1//g; } print "<- remove_between\n" if $opt_v > 2; return split("\n", $all_lines); } # 1}}} sub remove_cobol_blanks { # {{{1 # subroutines derived from SLOCCount my ($ra_lines, ) = @_; my $free_format = 0; # Support "free format" source code. my @save_lines = (); foreach (@{$ra_lines}) { next if m/^\s*$/; my $line = expand($_); # convert tabs to equivalent spaces $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i; if ($free_format) { push @save_lines, $_; } else { push @save_lines, $_ unless m/^\d{6}\s*$/ or ($line =~ m/^\d{6}\s{66}/); } } return @save_lines; } # 1}}} sub remove_cobol_comments { # {{{1 # subroutines derived from SLOCCount my ($ra_lines, ) = @_; my $free_format = 0; # Support "free format" source code. my @save_lines = (); foreach (@{$ra_lines}) { if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;} if ($free_format) { push @save_lines, $_ unless m{^\s*\*}; } else { push @save_lines, $_ unless m{^......\*} or m{^\*}; } } return @save_lines; } # 1}}} sub remove_jcl_comments { # {{{1 my ($ra_lines, ) = @_; print "-> remove_jcl_comments\n" if $opt_v > 2; my @save_lines = (); my $in_comment = 0; foreach (@{$ra_lines}) { next if /^\s*$/; next if m{^\s*//\*}; last if m{^\s*//\s*$}; push @save_lines, $_; } print "<- remove_jcl_comments\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_jsp_comments { # {{{1 # JSP comment is <%-- body of comment --%> my ($ra_lines, ) = @_; print "-> remove_jsp_comments\n" if $opt_v > 2; my @save_lines = (); my $in_comment = 0; foreach (@{$ra_lines}) { next if /^\s*$/; s/<\%\-\-.*?\-\-\%>//g; # strip one-line comments next if /^\s*$/; if ($in_comment) { if (/\-\-\%>/) { s/^.*?\-\-\%>//; $in_comment = 0; } } next if /^\s*$/; $in_comment = 1 if /^(.*?)<\%\-\-/; next if defined $1 and $1 =~ /^\s*$/; next if ($in_comment); push @save_lines, $_; } print "<- remove_jsp_comments\n" if $opt_v > 2; return @save_lines; } # 1}}} sub remove_html_comments { # {{{1 # HTML comment is # Need to use my own routine until the HTML comment regex in # the Regexp::Common module can handle my ($ra_lines, ) = @_; print "-> remove_html_comments\n" if $opt_v > 2; my @save_lines = (); my $in_comment = 0; foreach (@{$ra_lines}) { next if /^\s*$/; s///g; # strip one-line comments next if /^\s*$/; if ($in_comment) { if (/\-\->/) { s/^.*?\-\->//; $in_comment = 0; } } next if /^\s*$/; $in_comment = 1 if /^(.*?) 2; return @save_lines; } # 1}}} sub determine_lit_type { # {{{1 my ($file) = @_; open (FILE, $file); while () { if (m/^\\begin{code}/) { close FILE; return 2; } if (m/^>\s/) { close FILE; return 1; } } return 0; } # 1}}} sub remove_haskell_comments { # {{{1 # Bulk of code taken from SLOCCount's haskell_count script. # Strips out {- .. -} and -- comments and counts the rest. # Pragmas, {-#...}, are counted as SLOC. # BUG: Doesn't handle strings with embedded block comment markers gracefully. # In practice, that shouldn't be a problem. my ($ra_lines, $file, ) = @_; print "-> remove_haskell_comments\n" if $opt_v > 2; my @save_lines = (); my $in_comment = 0; my $incomment = 0; my ($literate, $inlitblock) = (0,0); $literate = 1 if $file =~ /\.lhs$/; if($literate) { $literate = determine_lit_type($file) } foreach (@{$ra_lines}) { if ($literate == 1) { if (!s/^>//) { s/.*//; } } elsif ($literate == 2) { if ($inlitblock) { if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; } } elsif (!$inlitblock) { if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; } else { s/.*//; } } } if ($incomment) { if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;} else { s/.*//; } } if (!$incomment) { s/--.*//; s!{-[^#].*?-}!!g; if (m/{-/ && (!m/{-#/)) { s/{-.*//; $incomment = 1; } } if (m/\S/) { push @save_lines, $_; } } # if ($incomment) {print "ERROR: ended in comment in $ARGV\n";} print "<- remove_haskell_comments\n" if $opt_v > 2; return @save_lines; } # 1}}} sub print_lines { # {{{1 my ($file , # in $title , # in $ra_lines , # in ) = @_; printf "->%-30s %s\n", $file, $title; for (my $i = 0; $i < scalar @{$ra_lines}; $i++) { printf "%5d | %s", $i+1, $ra_lines->[$i]; print "\n" unless $ra_lines->[$i] =~ m{\n$} } } # 1}}} sub set_constants { # {{{1 my ($rh_Language_by_Extension , # out $rh_Language_by_Script , # out $rh_Language_by_File , # out $rhaa_Filters_by_Language , # out $rh_Not_Code_Extension , # out $rh_Not_Code_Filename , # out $rh_Scale_Factor , # out $rh_Known_Binary_Archives , # out ) = @_; # 1}}} %{$rh_Language_by_Extension} = ( # {{{1 'abap' => 'ABAP' , 'ac' => 'm4' , 'ada' => 'Ada' , 'adb' => 'Ada' , 'ads' => 'Ada' , 'adso' => 'ADSO/IDSM' , 'am' => 'make' , 'ample' => 'AMPLE' , 'dofile' => 'AMPLE' , 'startup' => 'AMPLE' , 'asa' => 'ASP' , 'asax' => 'ASP.Net' , 'ascx' => 'ASP.Net' , 'asm' => 'Assembler' , 'asmx' => 'ASP.Net' , 'asp' => 'ASP' , 'aspx' => 'ASP.Net' , 'master' => 'ASP.Net' , 'sitemap' => 'ASP.Net' , 'awk' => 'awk' , 'bash' => 'Bourne Again Shell' , 'bas' => 'Visual Basic' , 'bat' => 'DOS Batch' , 'BAT' => 'DOS Batch' , 'cbl' => 'COBOL' , 'CBL' => 'COBOL' , 'c' => 'C' , 'C' => 'C++' , 'cc' => 'C++' , 'ccs' => 'CCS' , 'cfm' => 'ColdFusion' , 'cl' => 'Lisp' , 'cls' => 'Visual Basic' , 'cob' => 'COBOL' , 'COB' => 'COBOL' , 'config' => 'ASP.Net' , 'cpp' => 'C++' , 'cs' => 'C#' , 'csh' => 'C Shell' , 'css' => "CSS" , 'cxx' => 'C++' , 'da' => 'DAL' , 'def' => 'Teamcenter def' , 'dmap' => 'NASTRAN DMAP' , 'dpr' => 'Pascal' , 'dtd' => 'DTD' , 'ec' => 'C' , 'el' => 'Lisp' , 'exp' => 'Expect' , 'f77' => 'Fortran 77' , 'F77' => 'Fortran 77' , 'f90' => 'Fortran 90' , 'F90' => 'Fortran 90' , 'f95' => 'Fortran 95' , 'F95' => 'Fortran 95' , 'f' => 'Fortran 77' , 'F' => 'Fortran 77' , 'fmt' => 'Oracle Forms' , 'focexec' => 'Focus' , 'frm' => 'Visual Basic' , 'gnumakefile' => 'make' , 'Gnumakefile' => 'make' , 'h' => 'C/C++ Header' , 'H' => 'C/C++ Header' , 'hh' => 'C/C++ Header' , 'hpp' => 'C/C++ Header' , 'hs' => 'Haskell' , 'htm' => 'HTML' , 'html' => 'HTML' , 'i3' => 'Modula3' , 'idl' => 'IDL' , 'ig' => 'Modula3' , 'il' => 'SKILL' , 'ils' => 'SKILL++' , 'inc' => 'inc' , # might be PHP 'itk' => 'Tcl/Tk' , 'java' => 'Java' , 'jcl' => 'JCL' , # IBM Job Control Lang. 'jl' => 'Lisp' , 'js' => 'Javascript' , 'jsp' => 'JSP' , # Java server pages 'ksh' => 'Korn Shell' , 'lhs' => 'Haskell' , 'l' => 'lex' , 'lsp' => 'Lisp' , 'lua' => 'Lua' , 'm3' => 'Modula3' , 'm4' => 'm4' , 'makefile' => 'make' , 'Makefile' => 'make' , 'met' => 'Teamcenter met' , 'mg' => 'Modula3' , 'mli' => 'ML' , 'ml' => 'ML' , 'm' => 'MATLAB/Objective C/MUMPS' , 'wdproj' => 'MSBuild scripts' , 'csproj' => 'MSBuild scripts' , 'mps' => 'MUMPS' , 'mth' => 'Teamcenter mth' , 'oscript' => 'LiveLink OScript' , 'pad' => 'Ada' , # Oracle Ada preprocessor 'pas' => 'Pascal' , 'pcc' => 'C++' , # Oracle C++ preprocessor 'perl' => 'Perl' , 'pfo' => 'Fortran 77' , 'pgc' => 'C' , # Postgres embedded C/C++ 'php3' => 'PHP' , 'php4' => 'PHP' , 'php5' => 'PHP' , 'php' => 'PHP' , 'plh' => 'Perl' , 'pl' => 'Perl' , 'PL' => 'Perl' , 'plx' => 'Perl' , 'pm' => 'Perl' , 'p' => 'Pascal' , 'pp' => 'Pascal' , 'psql' => 'SQL' , 'py' => 'Python' , 'rb' => 'Ruby' , # 'resx' => 'ASP.Net' , 'rex' => 'Oracle Reports' , 'rexx' => 'Rexx' , 's' => 'Assembler' , 'S' => 'Assembler' , 'sbl' => 'Softbridge Basic' , 'SBL' => 'Softbridge Basic' , 'sc' => 'Lisp' , 'scm' => 'Lisp' , 'sed' => 'sed' , 'ses' => 'Patran Command Language' , 'pcl' => 'Patran Command Language' , 'sh' => 'Bourne Shell' , 'sql' => 'SQL' , 'SQL' => 'SQL' , 'tcl' => 'Tcl/Tk' , 'tcsh' => 'C Shell' , 'tk' => 'Tcl/Tk' , 'vba' => 'Visual Basic' , 'VBA' => 'Visual Basic' , # 'vbp' => 'Visual Basic' , # .vbp - autogenerated 'vb' => 'Visual Basic' , 'VB' => 'Visual Basic' , # 'vbw' => 'Visual Basic' , # .vbw - autogenerated 'vbs' => 'Visual Basic' , 'VBS' => 'Visual Basic' , 'webinfo' => 'ASP.Net' , 'xml' => 'XML' , 'XML' => 'XML' , 'build' => 'NAnt scripts' , 'vim' => 'vim script' , 'xsd' => 'XSD' , 'XSD' => 'XSD' , 'xslt' => 'XSLT' , 'XSLT' => 'XSLT' , 'xsl' => 'XSLT' , 'XSL' => 'XSLT' , 'y' => 'yacc' , 'yaml' => 'YAML' , 'yml' => 'YAML' , ); # 1}}} %{$rh_Language_by_Script} = ( # {{{1 'awk' => 'awk' , 'bash' => 'Bourne Again Shell' , 'bc' => 'bc' ,# calculator 'csh' => 'C Shell' , 'idl' => 'IDL' , 'ksh' => 'Korn Shell' , 'lua' => 'Lua' , 'make' => 'make' , 'octave' => 'Octave' , 'perl5' => 'Perl' , 'perl' => 'Perl' , 'ruby' => 'Ruby' , 'sed' => 'sed' , 'sh' => 'Bourne Shell' , 'tcl' => 'Tcl/Tk' , 'tclsh' => 'Tcl/Tk' , 'tcsh' => 'C Shell' , 'wish' => 'Tcl/Tk' , ); # 1}}} %{$rh_Language_by_File} = ( # {{{1 'Makefile' => 'make' , 'makefile' => 'make' , 'gnumakefile' => 'make' , 'Gnumakefile' => 'make' , ); # 1}}} %{$rhaa_Filters_by_Language} = ( # {{{1 'ABAP' => [ [ 'remove_matches' , '^\*' ], ], 'ASP' => [ [ 'remove_matches' , '^\s*\47'], ], # \47 = ' 'ASP.Net' => [ [ 'call_regexp_common' , 'C' ], ], 'Ada' => [ [ 'remove_matches' , '^\s*--' ], ], 'ADSO/IDSM' => [ [ 'remove_matches' , '^\s*\*[\+\!]' ], ], 'AMPLE' => [ [ 'remove_matches' , '^\s*//' ], ], 'Assembler' => [ [ 'remove_matches' , '^\s*//' ], [ 'remove_matches' , '^\s*;' ], [ 'call_regexp_common' , 'C' ], ], 'awk' => [ [ 'remove_matches' , '^\s*#' ], ], 'bc' => [ [ 'remove_matches' , '^\s*#' ], ], 'C' => [ [ 'remove_matches' , '^\s*//' ], # C99 [ 'call_regexp_common' , 'C' ], ], 'C++' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'C/C++ Header' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'C#' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'CCS' => [ [ 'call_regexp_common' , 'C' ], ], 'CSS' => [ [ 'call_regexp_common' , 'C' ], ], 'COBOL' => [ [ 'remove_cobol_comments', ], ], 'ColdFusion' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'Crystal Reports' => [ [ 'remove_matches' , '^\s*//' ], ], 'DAL' => [ [ 'remove_between' , '[]', ], ], 'NASTRAN DMAP' => [ [ 'remove_matches' , '^\s*\$' ], ], 'DOS Batch' => [ [ 'remove_matches' , '^\s*rem', ], ], 'DTD' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'Expect' => [ [ 'remove_matches' , '^\s*#' ], ], 'Focus' => [ [ 'remove_matches' , '^\s*\-\*' ], ], 'Fortran 77' => [ [ 'remove_f77_comments' , ], ], 'Fortran 90' => [ [ 'remove_f77_comments' , ], [ 'remove_f90_comments' , ], ], 'Fortran 95' => [ [ 'remove_f77_comments' , ], [ 'remove_f90_comments' , ], ], 'HTML' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'Haskell' => [ [ 'remove_haskell_comments', '>filename<' ], ], 'IDL' => [ [ 'remove_matches' , '^\s*;' ], ], 'JSP' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], [ 'remove_jsp_comments', ], ], 'Java' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'Javascript' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'JCL' => [ [ 'remove_jcl_comments' , ], ], 'Lisp' => [ [ 'remove_matches' , '^\s*;' ], ], 'LiveLink OScript' => [ [ 'remove_matches' , '^\s*//' ], ], # 'Lua' => [ [ 'call_regexp_common' , 'lua' ], ], 'Lua' => [ [ 'remove_matches' , '^\s*\-\-' ], ], 'make' => [ [ 'remove_matches' , '^\s*#' ], ], 'MATLAB' => [ [ 'remove_matches' , '^\s*%' ], ], 'Modula3' => [ [ 'call_regexp_common' , 'Pascal' ], ], # Modula 3 comments are (* ... *) so applying the Pascal filter # which also treats { ... } as a comment is not really correct. 'Objective C' => [ [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'MATLAB/Objective C/MUMPS' => [ [ 'die' , ], ], # never called 'MUMPS' => [ [ 'remove_matches' , '^\s*;' ], ], 'Octave' => [ [ 'remove_matches' , '^\s*#' ], ], 'Oracle Forms' => [ [ 'call_regexp_common' , 'C' ], ], 'Oracle Reports' => [ [ 'call_regexp_common' , 'C' ], ], 'Pascal' => [ [ 'call_regexp_common' , 'Pascal' ], ], 'Patran Command Language'=> [ [ 'remove_matches' , '^\s*#' ], [ 'remove_matches' , '^\s*\$#' ], [ 'call_regexp_common' , 'C' ], ], 'Perl' => [ [ 'remove_below' , '^__(END|DATA)__'], [ 'remove_matches' , '^\s*#' ], ], 'Python' => [ [ 'remove_matches' , '^\s*#' ], ], 'PHP' => [ [ 'remove_matches' , '^\s*#' ], [ 'remove_matches' , '^\s*//' ], [ 'call_regexp_common' , 'C' ], ], 'Rexx' => [ [ 'call_regexp_common' , 'C' ], ], 'Ruby' => [ [ 'remove_matches' , '^\s*#' ], ], 'SKILL' => [ [ 'remove_matches' , '^\s*;' ], [ 'call_regexp_common' , 'C' ], ], 'SKILL++' => [ [ 'remove_matches' , '^\s*;' ], [ 'call_regexp_common' , 'C' ], ], 'SQL' => [ [ 'remove_matches' , '^\s*--' ], [ 'call_regexp_common' , 'C' ], ], 'sed' => [ [ 'remove_matches' , '^\s*#' ], ], 'Bourne Again Shell' => [ [ 'remove_matches' , '^\s*#' ], ], 'Bourne Shell' => [ [ 'remove_matches' , '^\s*#' ], ], 'm4' => [ [ 'remove_matches' , '^dnl ' ], ], 'C Shell' => [ [ 'remove_matches' , '^\s*#' ], ], 'Korn Shell' => [ [ 'remove_matches' , '^\s*#' ], ], 'Tcl/Tk' => [ [ 'remove_matches' , '^\s*#' ], ], 'Teamcenter def' => [ [ 'remove_matches' , '^\s*#' ], ], 'Teamcenter met' => [ [ 'call_regexp_common' , 'C' ], ], 'Teamcenter mth' => [ [ 'remove_matches' , '^\s*#' ], ], 'Softbridge Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ], [ 'remove_matches' , '^\s*Attribute\s+'], [ 'remove_matches' , '^\s*\47'], ], # \47 = ' 'vim script' => [ [ 'remove_matches' , '^\s*"' ], ], 'Visual Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ], [ 'remove_matches' , '^\s*Attribute\s+'], [ 'remove_matches' , '^\s*\47'], ], # \47 = ' 'yacc' => [ [ 'call_regexp_common' , 'C' ], ], 'YAML' => [ [ 'remove_matches' , '^\s*#' ], ], 'lex' => [ [ 'call_regexp_common' , 'C' ], ], 'XML' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'XSD' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'XSLT' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'NAnt scripts' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], 'MSBuild scripts' => [ [ 'remove_html_comments', ], [ 'call_regexp_common' , 'HTML' ], ], ); # 1}}} %{$rh_Not_Code_Extension} = ( # {{{1 '1' => 1, # Man pages (documentation): '2' => 1, '3' => 1, '4' => 1, '5' => 1, '6' => 1, '7' => 1, '8' => 1, '9' => 1, 'a' => 1, # Static object code. 'ad' => 1, # X application default resource file. 'afm' => 1, # font metrics 'arc' => 1, # arc(1) archive 'arj' => 1, # arj(1) archive 'au' => 1, # Audio sound filearj(1) archive 'bak' => 1, # Backup files - we only want to count the "real" files. 'bdf' => 1, 'bmp' => 1, 'bz2' => 1, # bzip2(1) compressed file 'csv' => 1, # comma separated values 'desktop' => 1, 'dic' => 1, 'doc' => 1, 'elc' => 1, 'eps' => 1, 'fig' => 1, 'gif' => 1, 'gz' => 1, 'hdf' => 1, # hierarchical data format 'in' => 1, # Debatable. 'jpg' => 1, 'kdelnk' => 1, 'man' => 1, 'mf' => 1, 'mp3' => 1, 'n' => 1, 'o' => 1, # Object code is generated from source code. 'pbm' => 1, 'pdf' => 1, 'pfb' => 1, 'png' => 1, 'po' => 1, 'ps' => 1, # Postscript is _USUALLY_ generated automatically. 'sgm' => 1, 'sgml' => 1, 'so' => 1, # Dynamically-loaded object code. 'Tag' => 1, 'tex' => 1, 'text' => 1, 'tfm' => 1, 'tgz' => 1, # gzipped tarball 'tiff' => 1, 'txt' => 1, 'vf' => 1, 'wav' => 1, 'xbm' => 1, 'xpm' => 1, 'Y' => 1, # file compressed with "Yabba" 'Z' => 1, # file compressed with "compress" 'zip' => 1, # zip archive ); # 1}}} %{$rh_Not_Code_Filename} = ( # {{{1 'AUTHORS' => 1, 'README' => 1, 'Readme' => 1, 'readme' => 1, 'README.tk' => 1, # used in kdemultimedia, it's confusing. 'Changelog' => 1, 'ChangeLog' => 1, 'Repository' => 1, 'CHANGES' => 1, 'Changes' => 1, '.cvsignore' => 1, 'Root' => 1, # CVS 'BUGS' => 1, 'TODO' => 1, 'COPYING' => 1, 'MAINTAINERS' => 1, 'Entries' => 1, 'iconfig.h' => 1, # Skip "iconfig.h" files; they're used in Imakefiles # (used in xlockmore): ); # 1}}} %{$rh_Scale_Factor} = ( # {{{1 '1032/af' => 5.00, '1st generation default' => 0.25, '2nd generation default' => 0.75, '3rd generation default' => 1.00, '4th generation default' => 4.00, '5th generation default' => 16.00, 'aas macro' => 0.88, 'abap/4' => 5.00, 'ABAP' => 5.00, 'accel' => 4.21, 'access' => 2.11, 'actor' => 3.81, 'acumen' => 2.86, 'Ada' => 0.52, 'Ada 83' => 1.13, 'Ada 95' => 1.63, 'adr/dl' => 2.00, 'adr/ideal/pdl' => 4.00, 'ads/batch' => 4.00, 'ads/online' => 4.00, 'ADSO/IDSM' => 3.00, 'advantage' => 2.11, 'ai shell default' => 1.63, 'ai shells' => 1.63, 'algol 68' => 0.75, 'algol w' => 0.75, 'ambush' => 2.50, 'aml' => 1.63, 'AMPLE' => 2.00, 'amppl ii' => 1.25, 'ansi basic' => 1.25, 'ansi cobol 74' => 0.75, 'ansi cobol 85' => 0.88, 'SQL' => 6.15, 'answer/db' => 6.15, 'apl 360/370' => 2.50, 'apl default' => 2.50, 'apl*plus' => 2.50, 'applesoft basic' => 0.63, 'application builder' => 4.00, 'application manager' => 2.22, 'aps' => 0.96, 'aps' => 4.71, 'apt' => 1.13, 'aptools' => 4.00, 'arc' => 1.63, 'ariel' => 0.75, 'arity' => 1.63, 'arity prolog' => 1.25, 'art' => 1.63, 'art enterprise' => 1.74, 'artemis' => 2.00, 'artim' => 1.74, 'as/set' => 4.21, 'asi/inquiry' => 6.15, 'ask windows' => 1.74, 'asa' => 1.29, 'ASP' => 1.29, 'ASP.Net' => 1.29, 'aspx' => 1.29, #'resx' => 1.29, 'asax' => 1.29, 'ascx' => 1.29, 'asmx' => 1.29, 'config' => 1.29, 'webinfo' => 1.29, 'CCS' => 5.33, # 'assembler (basic)' => 0.25, 'Assembler' => 0.25, 'assembler (macro)' => 0.51, 'associative default' => 1.25, 'autocoder' => 0.25, 'awk' => 3.81, 'aztec c' => 0.63, 'balm' => 0.75, 'base sas' => 1.51, 'basic' => 0.75, 'basic a' => 0.63, # 'basic assembly' => 0.25, 'bc' => 1.50, 'berkeley pascal' => 0.88, 'better basic' => 0.88, 'bliss' => 0.75, 'bmsgen' => 2.22, 'boeingcalc' => 13.33, 'bteq' => 6.15, 'C' => 0.77, 'c set 2' => 0.88, 'C#' => 1.36, 'C++' => 1.51, 'c86plus' => 0.63, 'cadbfast' => 2.00, 'caearl' => 2.86, 'cast' => 1.63, 'cbasic' => 0.88, 'cdadl' => 4.00, 'cellsim' => 1.74, 'ColdFusion' => 4.00, 'chili' => 0.75, 'chill' => 0.75, 'cics' => 1.74, 'clarion' => 1.38, 'clascal' => 1.00, 'cli' => 2.50, 'clipper' => 2.05, 'clipper db' => 2.00, 'clos' => 3.81, 'clout' => 2.00, 'cms2' => 0.75, 'cmsgen' => 4.21, 'COBOL' => 1.04, 'COBOL ii' => 0.75, 'COBOL/400' => 0.88, 'cobra' => 4.00, 'codecenter' => 2.22, 'cofac' => 2.22, 'cogen' => 2.22, 'cognos' => 2.22, 'cogo' => 1.13, 'comal' => 1.00, 'comit ii' => 1.25, 'common lisp' => 1.25, 'concurrent pascal' => 1.00, 'conniver' => 1.25, 'cool:gen/ief' => 2.58, 'coral 66' => 0.75, 'corvet' => 4.21, 'corvision' => 5.33, 'cpl' => 0.50, 'Crystal Reports' => 4.00, 'csl' => 1.63, 'csp' => 1.51, 'cssl' => 1.74, 'CSS' => 1.0, 'culprit' => 1.57, 'cxpert' => 1.63, 'cygnet' => 4.21, 'DAL' => 1.50, 'data base default' => 2.00, 'dataflex' => 2.00, 'datatrieve' => 4.00, 'dbase iii' => 2.00, 'dbase iv' => 1.54, 'dcl' => 0.38, 'decision support default' => 2.22, 'decrally' => 2.00, 'delphi' => 2.76, 'dl/1' => 2.00, 'NASTRAN DMAP' => 2.35, 'dna4' => 4.21, 'DOS Batch' => 0.63, 'dsp assembly' => 0.50, 'dtabl' => 1.74, 'dtipt' => 1.74, 'dyana' => 1.13, 'dynamoiii' => 1.74, 'easel' => 2.76, 'easy' => 1.63, 'easytrieve+' => 2.35, 'eclipse' => 1.63, 'eda/sql' => 6.67, 'edscheme 3.4' => 1.51, 'eiffel' => 3.81, 'enform' => 1.74, 'englishbased default' => 1.51, 'ensemble' => 2.76, 'epos' => 4.00, 'erlang' => 2.00, 'esf' => 2.00, 'espadvisor' => 1.63, 'espl/i' => 1.13, 'euclid' => 0.75, 'excel' => 1.74, 'excel 12' => 13.33, 'excel 34' => 13.33, 'excel 5' => 13.33, 'express' => 2.22, 'exsys' => 1.63, 'extended common lisp' => 1.43, 'eznomad' => 2.22, 'facets' => 4.00, 'factorylink iv' => 2.76, 'fame' => 2.22, 'filemaker pro' => 2.22, 'flavors' => 2.76, 'flex' => 1.74, 'flexgen' => 2.76, 'Focus' => 1.90, 'foil' => 1.51, 'forte' => 4.44, 'forth' => 1.25, 'Fortran 66' => 0.63, 'Fortran 77' => 0.75, 'Fortran 90' => 1.00, 'Fortran 95' => 1.13, 'Fortran II' => 0.63, 'foundation' => 2.76, 'foxpro' => 2.29, 'foxpro 1' => 2.00, 'foxpro 2.5' => 2.35, 'framework' => 13.33, 'g2' => 1.63, 'gamma' => 5.00, 'genascript' => 2.96, 'gener/ol' => 6.15, 'genexus' => 5.33, 'genifer' => 4.21, 'geode 2.0' => 5.00, 'gfa basic' => 2.35, 'gml' => 1.74, 'golden common lisp' => 1.25, 'gpss' => 1.74, 'guest' => 2.86, 'guru' => 1.63, 'gw basic' => 0.82, 'Haskell' => 2.11, 'high c' => 0.63, 'hlevel' => 1.38, 'hp basic' => 0.63, 'HTML' => 1.90 , 'XML' => 1.90 , 'XSLT' => 1.90 , 'DTD' => 1.90 , 'XSD' => 1.90 , 'NAnt scripts' => 1.90 , 'MSBuild scripts' => 1.90 , 'HTML 2' => 5.00, 'HTML 3' => 5.33, 'huron' => 5.00, 'ibm adf i' => 4.00, 'ibm adf ii' => 4.44, 'ibm advanced basic' => 0.82, 'ibm cics/vs' => 2.00, 'ibm compiled basic' => 0.88, 'ibm vs cobol' => 0.75, 'ibm vs cobol ii' => 0.88, 'ices' => 1.13, 'icon' => 1.00, 'ideal' => 1.54, 'idms' => 2.00, 'ief' => 5.71, 'ief/cool:gen' => 2.58, 'iew' => 5.71, 'ifps/plus' => 2.50, 'imprs' => 2.00, 'informix' => 2.58, 'ingres' => 2.00, 'inquire' => 6.15, 'insight2' => 1.63, 'install/1' => 5.00, 'intellect' => 1.51, 'interlisp' => 1.38, 'interpreted basic' => 0.75, 'interpreted c' => 0.63, 'iqlisp' => 1.38, 'iqrp' => 6.15, 'j2ee' => 1.60, 'janus' => 1.13, 'Java' => 1.36, 'Javascript' => 1.48, 'JSP' => 1.48, 'JCL' => 1.67, 'joss' => 0.75, 'jovial' => 0.75, 'jsp' => 1.36, 'kappa' => 2.00, 'kbms' => 1.63, 'kcl' => 1.25, 'kee' => 1.63, 'keyplus' => 2.00, 'kl' => 1.25, 'klo' => 1.25, 'knowol' => 1.63, 'krl' => 1.38, 'Korn Shell' => 3.81, 'ladder logic' => 2.22, 'lambit/l' => 1.25, 'lattice c' => 0.63, 'liana' => 0.63, 'lilith' => 1.13, 'linc ii' => 5.71, 'Lisp' => 1.25, 'LiveLink OScript' => 3.5 , 'loglisp' => 1.38, 'loops' => 3.81, 'lotus 123 dos' => 13.33, 'lotus macros' => 0.75, 'lotus notes' => 3.64, 'lucid 3d' => 13.33, 'lyric' => 1.51, 'm4' => 1.00, 'm' => 5.00, 'macforth' => 1.25, 'mach1' => 2.00, 'machine language' => 0.13, 'maestro' => 5.00, 'magec' => 5.00, 'magik' => 3.81, 'Lake' => 3.81, 'make' => 2.50, 'mantis' => 2.96, 'mapper' => 0.99, 'mark iv' => 2.00, 'mark v' => 2.22, 'mathcad' => 16.00, 'mdl' => 2.22, 'mentor' => 1.51, 'mesa' => 0.75, 'microfocus cobol' => 1.00, 'microforth' => 1.25, 'microsoft c' => 0.63, 'microstep' => 4.00, 'miranda' => 2.00, 'model 204' => 2.11, 'modula 2' => 1.00, 'mosaic' => 13.33, # 'ms c ++ v. 7' => 1.51, 'ms compiled basic' => 0.88, 'msl' => 1.25, 'mulisp' => 1.25, 'MUMPS' => 4.21, 'Nastran' => 1.13, 'natural' => 1.54, 'natural 1' => 1.51, 'natural 2' => 1.74, 'natural construct' => 3.20, 'natural language' => 0.03, 'netron/cap' => 4.21, 'nexpert' => 1.63, 'nial' => 1.63, 'nomad2' => 2.00, 'nonprocedural default' => 2.22, 'notes vip' => 2.22, 'nroff' => 1.51, 'object assembler' => 1.25, 'object lisp' => 2.76, 'object logo' => 2.76, 'object pascal' => 2.76, 'object star' => 5.00, 'Objective C' => 2.96, 'objectoriented default' => 2.76, 'objectview' => 3.20, 'ogl' => 1.00, 'omnis 7' => 2.00, 'oodl' => 2.76, 'ops' => 1.74, 'ops5' => 1.38, 'oracle' => 2.76, 'Oracle Reports' => 2.76, 'Oracle Forms' => 2.67, 'Oracle Developer/2000' => 3.48, 'oscar' => 0.75, 'pacbase' => 1.67, 'pace' => 2.00, 'paradox/pal' => 2.22, 'Pascal' => 0.88, 'Patran Command Language' => 2.50, 'pc focus' => 2.22, 'pdl millenium' => 3.81, 'pdp11 ade' => 1.51, 'peoplesoft' => 2.50, 'Perl' => 4.00, 'persistance object builder' => 3.81, 'pilot' => 1.51, 'pl/1' => 1.38, 'pl/m' => 1.13, 'pl/s' => 0.88, 'pl/sql' => 2.58, 'planit' => 1.51, 'planner' => 1.25, 'planperfect 1' => 11.43, 'plato' => 1.51, 'polyforth' => 1.25, 'pop' => 1.38, 'poplog' => 1.38, 'power basic' => 1.63, 'powerbuilder' => 3.33, 'powerhouse' => 5.71, 'ppl (plus)' => 2.00, 'problemoriented default' => 1.13, 'proc' => 2.96, 'procedural default' => 0.75, 'professional pascal' => 0.88, 'program generator default' => 5.00, 'progress v4' => 2.22, 'proiv' => 1.38, 'prolog' => 1.25, 'prose' => 0.75, 'proteus' => 0.75, 'qbasic' => 1.38, 'qbe' => 6.15, 'qmf' => 5.33, 'qnial' => 1.63, 'quattro' => 13.33, 'quattro pro' => 13.33, 'query default' => 6.15, 'quick basic 1' => 1.25, 'quick basic 2' => 1.31, 'quick basic 3' => 1.38, 'quick c' => 0.63, 'quickbuild' => 2.86, 'quiz' => 5.33, 'rally' => 2.00, 'ramis ii' => 2.00, 'rapidgen' => 2.86, 'ratfor' => 0.88, 'rdb' => 2.00, 'realia' => 1.74, 'realizer 1.0' => 2.00, 'realizer 2.0' => 2.22, 'relate/3000' => 2.00, 'reuse default' => 16.00, 'Rexx' => 1.19, 'Rexx (mvs)' => 1.00, 'Rexx (os/2)' => 1.74, 'rm basic' => 0.88, 'rm cobol' => 0.75, 'rm fortran' => 0.75, 'rpg i' => 1.00, 'rpg ii' => 1.63, 'rpg iii' => 1.63, 'rtexpert 1.4' => 1.38, 'sabretalk' => 0.90, 'sail' => 0.75, 'sapiens' => 5.00, 'sas' => 1.95, 'savvy' => 6.15, 'sbasic' => 0.88, 'sceptre' => 1.13, 'scheme' => 1.51, 'screen painter default' => 13.33, 'sequal' => 6.67, 'Bourne Shell' => 3.81, 'Bourne Again Shell' => 3.81, 'ksh' => 3.81, 'C Shell' => 3.81, 'siebel tools ' => 6.15, 'simplan' => 2.22, 'simscript' => 1.74, 'simula' => 1.74, 'simula 67' => 1.74, 'simulation default' => 1.74, 'SKILL' => 2.00, 'SKILL++' => 2.00, 'slogan' => 0.98, 'smalltalk' => 2.50, 'smalltalk 286' => 3.81, 'smalltalk 80' => 3.81, 'smalltalk/v' => 3.81, 'snap' => 1.00, 'snobol24' => 0.63, 'softscreen' => 5.71, 'Softbridge Basic' => 2.76, 'solo' => 1.38, 'speakeasy' => 2.22, 'spinnaker ppl' => 2.22, 'splus' => 2.50, 'spreadsheet default' => 13.33, 'sps' => 0.25, 'spss' => 2.50, 'SQL' => 2.29, 'sqlwindows' => 6.67, 'statistical default' => 2.50, 'strategem' => 2.22, 'stress' => 1.13, 'strongly typed default' => 0.88, 'style' => 1.74, 'superbase 1.3' => 2.22, 'surpass' => 13.33, 'sybase' => 2.00, 'symantec c++' => 2.76, 'symbolang' => 1.25, 'synchroworks' => 4.44, 'synon/2e' => 4.21, 'systemw' => 2.22, 'tandem access language' => 0.88, 'Tcl/Tk' => 4.00, 'Teamcenter def' => 1.00, 'Teamcenter met' => 1.00, 'Teamcenter mth' => 1.00, 'telon' => 5.00, 'tessaract' => 2.00, 'the twin' => 13.33, 'themis' => 6.15, 'tiief' => 5.71, 'topspeed c++' => 2.76, 'transform' => 5.33, 'translisp plus' => 1.43, 'treet' => 1.25, 'treetran' => 1.25, 'trs80 basic' => 0.63, 'true basic' => 1.25, 'turbo c' => 0.63, # 'turbo c++' => 1.51, 'turbo expert' => 1.63, 'turbo pascal >5' => 1.63, 'turbo pascal 14' => 1.00, 'turbo pascal 45' => 1.13, 'turbo prolog' => 1.00, 'turing' => 1.00, 'tutor' => 1.51, 'twaice' => 1.63, 'ucsd pascal' => 0.88, 'ufo/ims' => 2.22, 'uhelp' => 2.50, 'uniface' => 5.00, # 'unix shell scripts' => 3.81, 'vax acms' => 1.38, 'vax ade' => 2.00, 'vbscript' => 2.35, 'vectran' => 0.75, 'vhdl ' => 4.21, 'vim script' => 3.00, 'visible c' => 1.63, 'visible cobol' => 2.00, 'visicalc 1' => 8.89, 'visual 4.0' => 2.76, 'visual basic' => 1.90, 'visual basic 1' => 1.74, 'visual basic 2' => 1.86, 'visual basic 3' => 2.00, 'visual basic 4' => 2.22, 'visual basic 5' => 2.76, 'Visual Basic' => 2.76, 'visual basic dos' => 2.00, 'visual c++' => 2.35, 'visual cobol' => 4.00, 'visual objects' => 5.00, 'visualage' => 3.81, 'visualgen' => 4.44, 'vpf' => 0.84, 'vsrexx' => 2.50, 'vulcan' => 1.25, 'vz programmer' => 2.22, 'warp x' => 2.00, 'watcom c' => 0.63, 'watcom c/386' => 0.63, 'waterloo c' => 0.63, 'waterloo pascal' => 0.88, 'watfiv' => 0.94, 'watfor' => 0.88, 'web scripts' => 5.33, 'whip' => 0.88, 'wizard' => 2.86, 'xlisp' => 1.25, 'yacc' => 1.51, 'yacc++' => 1.51, 'YAML' => 0.90, 'zbasic' => 0.88, 'zim' => 4.21, 'zlisp' => 1.25, 'Expect' => 2.00, 'C/C++ Header' => 1.00, 'inc' => 1.00, 'lex' => 1.00, 'MATLAB' => 4.00, 'IDL' => 3.80, 'Octave' => 4.00, 'ML' => 3.00, 'Modula3' => 2.00, 'PHP' => 3.50, 'Python' => 4.20, 'Ruby' => 4.20, 'sed' => 4.00, 'Lua' => 4.00, ); # 1}}} %{$rh_Known_Binary_Archives} = ( # {{{1 '.tar' => 1 , '.tar.Z' => 1 , '.tar.gz' => 1 , '.tar.bz2' => 1 , '.zip'