You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
3.5 KiB
140 lines
3.5 KiB
20 years ago
|
--- lib/Text/Autoformat.pm.orig 2003-05-27 18:34:38.000000000 -0500
|
||
|
+++ lib/Text/Autoformat.pm 2005-02-09 11:35:36.487236034 -0600
|
||
|
@@ -2,7 +2,7 @@
|
||
|
|
||
|
use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); use Carp;
|
||
|
use 5.005;
|
||
|
-$VERSION = '1.12';
|
||
|
+$VERSION = '1.13beta';
|
||
|
|
||
|
require Exporter;
|
||
|
|
||
|
@@ -74,15 +74,23 @@
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
-my $ignore_headers = qr/\A(From\b.*$)?([^:]+:.*$([ \t].*$)*)+\s*\Z/m;
|
||
|
+my $ignore_headers = qr/
|
||
|
+ \A
|
||
|
+ (?: From \b .* $)?
|
||
|
+ (?: [^:\n]+ : .* \n
|
||
|
+ (?: [ \t] .* \n)*
|
||
|
+ )+
|
||
|
+ \s*
|
||
|
+ \Z
|
||
|
+ /mx;
|
||
|
my $ignore_indent = qr/^[^\S\n].*(\n[^\S\n].*)*$/;
|
||
|
|
||
|
-sub ignore_headers { $_[0]==1 && /$ignore_headers/ }
|
||
|
+sub ignore_headers { $_[0] && /$ignore_headers/ }
|
||
|
|
||
|
# BITS OF A TEXT LINE
|
||
|
|
||
|
my $quotechar = qq{[!#%=|:]};
|
||
|
-my $quotechunk = qq{(?:$quotechar(?![a-z])|[a-z]*>+)};
|
||
|
+my $quotechunk = qq{(?:$quotechar(?![a-z])|(?:[a-z]\\w*)?>+)};
|
||
|
my $quoter = qq{(?:(?i)(?:$quotechunk(?:[ \\t]*$quotechunk)*))};
|
||
|
|
||
|
my $separator = q/(?:[-_]{2,}|[=#*]{3,}|[+~]{4,})/;
|
||
|
@@ -120,9 +128,11 @@
|
||
|
$args{break} = break_at('-') unless exists $args{break};
|
||
|
$args{impfill} = ! exists $args{fill};
|
||
|
$args{expfill} = $args{fill};
|
||
|
+ $args{tabspace} = 8 unless exists $args{tabspace};
|
||
|
$args{renumber} = 1 unless exists $args{renumber};
|
||
|
$args{autocentre} = 1 unless exists $args{autocentre};
|
||
|
$args{_centred} = 1 if $args{justify} =~ /cent(er(ed)?|red?)/;
|
||
|
+ $args{all} ||= $args{mail};
|
||
|
|
||
|
# SPECIAL IGNORANCE...
|
||
|
if ($args{ignore}) {
|
||
|
@@ -138,13 +148,16 @@
|
||
|
croak "Expected suboutine reference as value for -ignore option"
|
||
|
if ref $args{ignore} ne 'CODE';
|
||
|
}
|
||
|
- else {
|
||
|
+ elsif ($args{mail}) {
|
||
|
$args{ignore} = \&ignore_headers;
|
||
|
}
|
||
|
+ else {
|
||
|
+ $args{ignore} = sub{0};
|
||
|
+ }
|
||
|
|
||
|
# DETABIFY
|
||
|
my @rawlines = split /\n/, $text;
|
||
|
- use Text::Tabs;
|
||
|
+ use Text::Tabs; $tabstop = $args{tabspace};
|
||
|
@rawlines = expand(@rawlines);
|
||
|
|
||
|
# PARSE EACH LINE
|
||
|
@@ -255,11 +268,13 @@
|
||
|
# SELECT PARAS TO HANDLE
|
||
|
|
||
|
my $remainder = "";
|
||
|
- if ($args{all}) { # STOP AT MAIL TERMINATOR
|
||
|
+ if ($args{all}) { # STOP AT MAIL TERMINATOR IF $args{mail}
|
||
|
+ my $lastignored = 1;
|
||
|
for my $index (0..$#paras) {
|
||
|
- local $_ = $paras[$index]{raw};
|
||
|
- $paras[$index]{ignore} = $args{ignore}($index+1);
|
||
|
- next unless /^--$/;
|
||
|
+ local $_ = $paras[$index]{raw} . "\n";
|
||
|
+ $lastignored &&=
|
||
|
+ $paras[$index]{ignore} = $args{ignore}($lastignored);
|
||
|
+ next unless $args{mail} && /^--$/;
|
||
|
$remainder = join "\n", map { $_->{raw} } splice @paras, $index;
|
||
|
$remainder .= "\n" unless $remainder =~ /\n\z/;
|
||
|
last;
|
||
|
@@ -513,7 +528,9 @@
|
||
|
}
|
||
|
|
||
|
my $abbrev = join '|', qw{
|
||
|
- etc[.] pp[.] ph[.]?d[.] U[.]S[.]
|
||
|
+ etc[.] pp[.] ph[.]?d[.]
|
||
|
+ (?:[A-Z][A-Za-z]+[.])+
|
||
|
+ (?:[A-Z][.])(?:[A-Z][.])+
|
||
|
};
|
||
|
|
||
|
my $gen_abbrev = join '|', $abbrev, qw{
|
||
|
@@ -604,6 +621,7 @@
|
||
|
}
|
||
|
|
||
|
package Hang;
|
||
|
+use strict;
|
||
|
|
||
|
# ROMAN NUMERALS
|
||
|
|
||
|
@@ -658,6 +676,9 @@
|
||
|
elsif ($_[1] =~ s#\A($hang)##) {
|
||
|
@vals = { type => 'bul', val => $1 }
|
||
|
}
|
||
|
+ elsif ($_[1] =~ m#\([^\s)]+\s#) {
|
||
|
+ @vals = ();
|
||
|
+ }
|
||
|
else {
|
||
|
local $^W;
|
||
|
my $cut;
|
||
|
@@ -801,6 +822,7 @@
|
||
|
sub empty { 0 }
|
||
|
|
||
|
package NullHang;
|
||
|
+use strict;
|
||
|
|
||
|
sub new { bless {}, $_[0] }
|
||
|
sub stringify { "" }
|
||
|
@@ -1021,6 +1043,14 @@
|
||
|
C<autoformat> will ignore any paragraph in which I<every> line begins with a
|
||
|
whitespace.
|
||
|
|
||
|
+One other special case of ignorance is ignoring mail headers and signature.
|
||
|
+This option is specified using the C<mail> argument:
|
||
|
+
|
||
|
+ $tidied_mesg = autoformat($messy_mesg, {mail=>1});
|
||
|
+
|
||
|
+Note that the C<mail> option automatically implies C<all>.
|
||
|
+
|
||
|
+
|
||
|
=head2 Bulleting and (re-)numbering
|
||
|
|
||
|
Often plaintext will include lists that are either:
|