--- lib/Text/Autoformat.pm.orig 2003-05-27 18:34:38.000000000 -0500 +++ lib/Text/Autoformat.pm 2005-02-09 11:35:36.487236034 -0600 @@ -2,7 +2,7 @@ use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); use Carp; use 5.005; -$VERSION = '1.12'; +$VERSION = '1.13beta'; require Exporter; @@ -74,15 +74,23 @@ return ""; } -my $ignore_headers = qr/\A(From\b.*$)?([^:]+:.*$([ \t].*$)*)+\s*\Z/m; +my $ignore_headers = qr/ + \A + (?: From \b .* $)? + (?: [^:\n]+ : .* \n + (?: [ \t] .* \n)* + )+ + \s* + \Z + /mx; my $ignore_indent = qr/^[^\S\n].*(\n[^\S\n].*)*$/; -sub ignore_headers { $_[0]==1 && /$ignore_headers/ } +sub ignore_headers { $_[0] && /$ignore_headers/ } # BITS OF A TEXT LINE my $quotechar = qq{[!#%=|:]}; -my $quotechunk = qq{(?:$quotechar(?![a-z])|[a-z]*>+)}; +my $quotechunk = qq{(?:$quotechar(?![a-z])|(?:[a-z]\\w*)?>+)}; my $quoter = qq{(?:(?i)(?:$quotechunk(?:[ \\t]*$quotechunk)*))}; my $separator = q/(?:[-_]{2,}|[=#*]{3,}|[+~]{4,})/; @@ -120,9 +128,11 @@ $args{break} = break_at('-') unless exists $args{break}; $args{impfill} = ! exists $args{fill}; $args{expfill} = $args{fill}; + $args{tabspace} = 8 unless exists $args{tabspace}; $args{renumber} = 1 unless exists $args{renumber}; $args{autocentre} = 1 unless exists $args{autocentre}; $args{_centred} = 1 if $args{justify} =~ /cent(er(ed)?|red?)/; + $args{all} ||= $args{mail}; # SPECIAL IGNORANCE... if ($args{ignore}) { @@ -138,13 +148,16 @@ croak "Expected suboutine reference as value for -ignore option" if ref $args{ignore} ne 'CODE'; } - else { + elsif ($args{mail}) { $args{ignore} = \&ignore_headers; } + else { + $args{ignore} = sub{0}; + } # DETABIFY my @rawlines = split /\n/, $text; - use Text::Tabs; + use Text::Tabs; $tabstop = $args{tabspace}; @rawlines = expand(@rawlines); # PARSE EACH LINE @@ -255,11 +268,13 @@ # SELECT PARAS TO HANDLE my $remainder = ""; - if ($args{all}) { # STOP AT MAIL TERMINATOR + if ($args{all}) { # STOP AT MAIL TERMINATOR IF $args{mail} + my $lastignored = 1; for my $index (0..$#paras) { - local $_ = $paras[$index]{raw}; - $paras[$index]{ignore} = $args{ignore}($index+1); - next unless /^--$/; + local $_ = $paras[$index]{raw} . "\n"; + $lastignored &&= + $paras[$index]{ignore} = $args{ignore}($lastignored); + next unless $args{mail} && /^--$/; $remainder = join "\n", map { $_->{raw} } splice @paras, $index; $remainder .= "\n" unless $remainder =~ /\n\z/; last; @@ -513,7 +528,9 @@ } my $abbrev = join '|', qw{ - etc[.] pp[.] ph[.]?d[.] U[.]S[.] + etc[.] pp[.] ph[.]?d[.] + (?:[A-Z][A-Za-z]+[.])+ + (?:[A-Z][.])(?:[A-Z][.])+ }; my $gen_abbrev = join '|', $abbrev, qw{ @@ -604,6 +621,7 @@ } package Hang; +use strict; # ROMAN NUMERALS @@ -658,6 +676,9 @@ elsif ($_[1] =~ s#\A($hang)##) { @vals = { type => 'bul', val => $1 } } + elsif ($_[1] =~ m#\([^\s)]+\s#) { + @vals = (); + } else { local $^W; my $cut; @@ -801,6 +822,7 @@ sub empty { 0 } package NullHang; +use strict; sub new { bless {}, $_[0] } sub stringify { "" } @@ -1021,6 +1043,14 @@ C will ignore any paragraph in which I line begins with a whitespace. +One other special case of ignorance is ignoring mail headers and signature. +This option is specified using the C argument: + + $tidied_mesg = autoformat($messy_mesg, {mail=>1}); + +Note that the C option automatically implies C. + + =head2 Bulleting and (re-)numbering Often plaintext will include lists that are either: