Navalla Supporting the evolution of a new free world.

 

Perl Formatter

DESCRIPTION

Adds syntax highlighting to Perl source code.

SYNOPSIS

Perl

require( 'perl_formatter.pl' );
# load perl source code
# ...
print "<pre>", perl_format( $perl_code ), "</pre>\n";

Perl-CGI

<?perl
App::require_pl( 'perl_formatter.pl' );
# load perl source code
# ...
print "<pre>", perl_format( $perl_code ), "</pre>\n";

# format perl-cgi code
print "<pre>", perl_format( $plc_code, 1, 1 ), "</pre>\n";
?>

COPYRIGHT

The library is free software. You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the Perl README file.

SOURCE CODE

# *******************
# * file: perl_formatter.pl
# * license: GPL or Artistic License
# * copyright: Christian Mueller <chrmue[at]cpan.org>
# ******************************************************

#use strict;
our(
    $Formatter, $ReqFormatter, $FmtHTML, $FmtPerlCGI,
    $Perl_Keywords, $Perl_Functions, $Perl_Methods, $Perl_Operators,
    $Perl_Special, $PerlCGI_Functions
);

our %Perl_ECOB = ( '[' => ']', '(' => ')', '<' => '>', '{' => '}' );

sub perl_format {
    my( $code, $pre, $perlcgi ) = @_;
    my(
        $out, $str, $len, $space, $tab, $lf, $sub, $i, $m, $p1, $p2, $l,
        $type
    );
    $pre = 1 unless defined $pre;
    $out = '';
    $code =~ s/\r//gs;
    $len = length( $code );
    if( $pre ) {
        $space = ' ';
        $lf = "\n";
        $tab = "&nbsp;&nbsp;&nbsp;&nbsp;";
    }
    else {
        $space = '&nbsp;';
        $lf = "<br />\n";
        $tab = "&nbsp;&nbsp;&nbsp;&nbsp;";
    }
    $Formatter = $sub = $perlcgi ? 'PerlCGI' : 'Perl';
    $FmtPerlCGI = $perlcgi;
    for( $i = 0; $i < $len; ) {
        while( 1 ) {
            $m = substr( $code, $i, 1 );
            if( $m eq ' ' ) {
                $out .= $space;
            }
            elsif( $m eq "\n" ) {
                $out .= $lf;
            }
            elsif( $m eq "\t" ) {
                $out .= $tab;
            }
            else {
                last;
            }
            $i ++;
        }
        if( $Formatter eq 'Perl' ) {
            $l = perl_get_token( $code, $i, $len, $type );
        }
        elsif( $Formatter eq 'PerlCGI' ) {
            $p1 = index( $code, '<*', $i );
            $p2 = index( $code, '<?', $i );
            $p1 = $p2 if $p1 < 0 || ($p2 >= 0 && $p2 < $p1);
            $p2 = -1;
            $m = substr( $code, $i );
            if( $m =~ /\<script\s+language\=[\"\']*perl[\"\']*\s*\>/i ) {
                $p2 = $i + $-[0];
            }
            if( $p1 >= 0 && $p2 >= 0 && $p2 < $p1 ) {
                $l = $p2 - $i;
                if( $l ) {
                    $type = 'PLAIN';
                }
                else {
                    $type = 'CGI';
                    $Formatter = 'Perl';
                    $l = $+[0] - $-[0];
                }
            }
            elsif( $p1 >= 0 ) {
                $l = $p1 - $i;
                if( $l ) {
                    $type = 'PLAIN';
                }
                else {
                    $type = 'CGI';
                    $Formatter = 'Perl';
                    $l = lc( substr( $code, $i + 2, 4 ) ) eq 'perl' ? 6 : 2;
                }
            }
            else {
                $l = $len - $i;
            }
        }
        $l = 1 if $l <= 0;
        $sub = $Formatter if $sub ne $Formatter;
        $str = substr( $code, $i, $l );
        $str =~ s/\&/&amp;/gs;
        $str =~ s/\</&lt;/gs;
        $str =~ s/\>/&gt;/gs;
        $str =~ s/\t/$tab/gs;
        $out .= '<span class="' . $sub . '_' . $type . '">' . $str . '</span>';
        if( $ReqFormatter ) {
            $Formatter = $ReqFormatter;
            $ReqFormatter = undef;
        }
        $i += $l;
        while( 1 ) {
            $m = substr( $code, $i, 1 );
            if( $m eq ' ' ) {
                $out .= $space;
            }
            elsif( $m eq "\n" ) {
                $out .= $lf;
            }
            elsif( $m eq "\t" ) {
                $out .= $tab;
            }
            else {
                last;
            }
            $i ++;
        }
    }
    return $out;
}

sub perl_get_token {
    my( $str, $offset, $len, $type ) = @_;
    my( $i, $ch, $l, $br );
    $str = substr( $str, $offset );
    $ch = substr( $str, 0, 1 );
    if( $ch eq '#' ) {
        $_[3] = 'CMT';
        $ch = substr( $str, 1, 1 );
        if( $FmtPerlCGI && ($ch = $Perl_ECOB{$ch}) ) {
            # perl6 embedded comment
            $br = $ch;
            for( $i = 2; ; $i ++ ) {
                $ch = substr( $str, $i, 1 );
                if( ($ch = $Perl_ECOB{$ch}) ) {
                    $br = $ch . $br;
                }
                else {
                    last;
                }
            }
            $i = index( $str, $br, $i );
            return $i >= 0 ? $i + length( $br ) : $len - $offset;
        }
        else {
            $i = index( $str, "\n", 1 );
            return $i >= 0 ? $i : $len - $offset;
        }
    }
    elsif( $ch eq '=' ) {
        if( substr( $str, 1, 1 ) eq '#' ) {
            # perl-cgi embedded comment
            $_[3] = 'CMT';
            $i = index( $str, '#=', 2 );
            return $i >= 0 ? $i + 2 : $len - $offset;
        }
        $_[3] = 'OP';
        return 1;
    }
    elsif( $ch eq '(' || $ch eq ')' ) {
        $_[3] = 'PAR';
        return 1;
    }
    elsif( $ch eq '{' || $ch eq '}' ) {
        $_[3] = 'CBR';
        return 1;
    }
    elsif( $ch eq '[' || $ch eq ']' ) {
        $_[3] = 'SBR';
        return 1;
    }
    elsif( $ch eq ',' ) {
        $_[3] = 'COMMA';
        return 1;
    }
    elsif( $ch eq ';' ) {
        $_[3] = 'SEMI';
        return 1;
    }
    elsif( $ch eq '-' ) {
        if( index( ' A B C M O R S T W X b c d e f g k l o p r s t u w x z ',
            ' ' . substr( $str, 1, 1 ) . ' ' ) >= 0 )
        {
            $_[3] = 'KWD';
            return 2;
        }
        $_[3] = 'OP';
        return 1;
    }
    elsif( $ch eq '*' || $ch eq '?' ) {
        if( substr( $str, 1, 1 ) eq '>' ) {
            $ReqFormatter = $FmtHTML ? 'HTML' : 'PerlCGI';
            $_[3] = 'CGI';
            return 2;
        }
        $_[3] = 'OP';
        return 1;
    }
    elsif( $ch eq '<' ) {
        if( $str =~ m/^\<\s*\/script\s*\>/i ) {
            if( $FmtHTML ) {
                $Formatter = 'HTML';
                $_[3] = 'TAG';
            }
            else {
                $ReqFormatter = 'PerlCGI';
                $_[3] = 'CGI';
            }
            return $+[0];
        }
        $_[3] = 'OP';
        return 1;
    }
    elsif( $ch eq '+' || $ch eq '/' || $ch eq '!' || $ch eq '&' ||
        $ch eq '|' || $ch eq ':' || $ch eq '>' || $ch eq '^' ||
        $ch eq '~' || $ch eq '.' || $ch eq '-'
    ) {
        $_[3] = 'OP';
        return 1;
    }
    elsif( $ch eq '\\' ) {
        $_[3] = 'ESC';
        $ch = substr( $str, 1, 1 );
        return $ch eq '"' || $ch eq '\'' || $ch eq '#' ? 2 : 1;
    }
    elsif( $ch eq '$' || $ch eq '%' || $ch eq '@' ) {
        $_[3] = 'VAR';
        if( $str =~ m/^[\$\%\@][\w\#\%\@\$\:\^]+/ ) {
            return $+[0];
        }
        return 1;
    }
    elsif( $ch eq '"' || $ch eq '\'' ) {
        $_[3] = 'STR';
        $i = 0;
        while( 1 ) {
            # not perfect but fast
            $l = index( $str, "\n", $i + 1 );
            $i = index( $str, $ch, $i + 1 );
            return $l if $l >= 0 && $l < $i;
            return $len - $offset if $i < 0;
            if( substr( $str, $i - 1, 1 ) ne '\\' ||
                substr( $str, $i - 2, 1 ) eq '\\'
            ) { 
                return $i + 1;
            }
        }
    }
    elsif( $ch ge '0' && $ch le '9' ) {
        if( $ch eq '0' && substr( $str, 1, 1 ) eq 'x' ) {
            for( $i = 2; ; $i ++ ) {
                $ch = substr( $str, $i, 1 );
                if( ($ch lt '0' || $ch gt '9') &&
                    ($ch lt 'a' || $ch gt 'f') &&
                    ($ch lt 'A' || $ch gt 'F')
                ) {
                    last;
                }
            }
        }
        else {
            for( $i = 1; ; $i ++ ) {
                $ch = substr( $str, $i, 1 );
                last if $ch lt '0' || $ch gt '9' || $ch ne '.';
            }
        }
        $_[3] = 'NUM';
        return $i;
    }
    elsif( $str =~ m/^[\w\_]+/ ) {
        $ch = ' ' . $& . ' ';
        if( index( $Perl_Keywords, $ch ) >= 0 ) {
            $_[3] = 'KWD';
        }
        elsif( index( $Perl_Functions, $ch ) >= 0 ) {
            $_[3] = 'FNC';
        }
        elsif( index( $Perl_Methods, $ch ) >= 0 ) {
            $_[3] = 'METH';
        }
        elsif( index( $Perl_Operators, $ch ) >= 0 ) {
            $_[3] = 'OP';
        }
        elsif( index( $Perl_Special, $ch ) >= 0 ) {
            $_[3] = 'SPC';
        }
        elsif( index( $PerlCGI_Functions, $ch ) >= 0 ) {
            $_[3] = 'PCF';
        }
        else {
            $_[3] = 'ID';
        }
        return $+[0];
    }
    $_[3] = 'ID';
    return 1;
}

$Perl_Keywords = q/
continue
do
else elsif exit
for foreach
goto
if
last local
my
next no
our
/ . q/package
redo require return
sub
unless until use
while
/;

$Perl_Functions = q/
accept alarm atan2
bind binmode bless
caller chdir chmod chomp chop chown chr chroot close closedir connect cos
crypt
dbmclose dbmopen defined delete die dump
each endgrent endhostent endnetent endprotoent endpwent endservent eof eval
exec exists exp
fcntl fileno flock fork format formline
getc getgrent getgrgid getgrname gethostbyaddr gethostbyname gethostent
getlogin getnetbyaddr getnetbyname
getnetent getpeername getpgrp getppid getpriority getprotobyname
getprotobynumber getprotoent getpwent
getpwnam getpwuid getservbyname getservbyport getservent getsockname
getsockopt glob gmtime grep
hex
index int ioctl
join
keys kill
lc lcfirst length link listen localtime log lstat
map mkdir msgctl msgget msgrcv msgsnd
oct open opendir ord
pack pipe pop pos print printf push
quotemeta
rand read readdir readline readlink recv ref rename reset reverse rewinddir
rindex rmdir
scalar seek seekdir select semctl semgett semop send setgrent sethostent
setnetent setpgrp setpriority
setprotoent setpwent setservent setsockopt shift shmctl shmget shmread
shmwrite shutdown sin sleep socket
socketpair sort splice split sprintf sqrt srand stat study substr symlink
syscall sysopen sysread system syswrite
tell telldir tie tied time times truncate
uc ucfirst umask undef unlink unpack unshift untie utime
values vec
wait waitpid wantarray warn write
/;

$Perl_Methods = q/
AUTOLOAD
BEGIN
CHECK CLEAR CLOSE CORE
DELETE DESTROY
END EXISTS EXTEND
FETCH FETCHSIZE FIRSTKEY
GETC
INIT
NEXTKEY
POP PRINT PRINTF PUSH
READ READLINE
SHIFT SPLICE STORE STORESIZE SUPER
TIEARRAY TIEHANDLE TIEHASH TIESCALAR
UNIVERSAL UNSHIFT UNTIE
WRITE
/ . q/__DATA__ __END__ __FILE__ __LINE__ __PACKAGE__
attributes attrs autouse
base blib bytes
charnames constant
diagnostics
fields filetest
integer
less lib locale
ops overload
perllocal
re
sigtrap strict subs
utf8
vars
warnings
/;

$Perl_Operators = q/
and
eq
ge gt
le lt
m
ne not
or
qq qw qx q
s
tr
x xor
y
/;

$Perl_Special = q/
STDERR STDIN STDOUT
create
detach
list lock
self shared
this threads
yield
/;

$PerlCGI_Functions = q/
App
cfg_get
decode_uri_component decode_uri dump_var
encode_uri_component encode_uri error_reporting
file_exists file_get file_put file_stat file_touch fullpath
gateway gmmktime gmstrftime
header
include include_once is_array is_double is_float is_hash is_int is_integer
is_long is_numeric is_object is_string is_unicode is_unsigned
is_uploaded_file
levenshtein
microtime mktime move_uploaded_file
number_format
print_var
register_cleanup require require_once require_pl round
session_cache_expire session_cache_limiter session_destroy session_id
session_name session_regenerate_id session_save_path session_set_handler
session_start set_locale set_timezone set_user_locale setcookie show_var
SID str_ireplace str_xreplace strfmon strftime
typeof
/;

$Perl_Keywords =~ tr/\n/ /;
$Perl_Functions =~ tr/\n/ /;
$Perl_Methods =~ tr/\n/ /;
$Perl_Operators =~ tr/\n/ /;
$Perl_Special =~ tr/\n/ /;
$PerlCGI_Functions =~ tr/\n/ /;

1;

STYLESHEET FORMAT

The webpage uses the stylsheet settings below to highlight Perl code.

span.Perl_KWD, span.Perl_CBR, span.Perl_SEMI {
    color: blue;
}
span.Perl_FNC, span.Perl_NUM {
    color: red;
}
span.Perl_STR {
    color: gray;
}
span.Perl_CMT {
    color: teal;
}
span.Perl_VAR {
    color: #CC6600;
}
span.Perl_OP, span.Perl_PAR, span.Perl_SBR, span.Perl_COMMA, span.Perl_ESC {
    color: green;
}
span.Perl_METH {
    color: orange;
}
span.Perl_SPC {
    color: magenta;
}
span.Perl_CGI {
    color: purple;
}
span.Perl_PCF {
    color: #804040;
}
 
UNITE FOR CHILDREN - UNITE AGAINST AIDS
 
Generated with Perl 5.10.1 and Perl-CGI 1.0 over FastCGI within 6.22ms in memory safe mode.