���ѧۧݧ�ӧ�� �ާ֧ߧ֧էا֧� - ���֧էѧܧ�ڧ��ӧѧ�� - /home/zhaagvlk/public_html/wp-includes/Requests/library/block-bindings/IO.tar
���ѧ٧ѧ�
Lines.pm 0000444 00000010112 15160667122 0006153 0 ustar 00 package IO::Lines; use strict; use Carp; use IO::ScalarArray; # The package version, both in 1.23 style *and* usable by MakeMaker: our $VERSION = '2.113'; # Inheritance: our @ISA = qw(IO::ScalarArray); ### also gets us new_tie :-) =head1 NAME IO::Lines - IO:: interface for reading/writing an array of lines =head1 SYNOPSIS use IO::Lines; ### See IO::ScalarArray for details =head1 DESCRIPTION This class implements objects which behave just like FileHandle (or IO::Handle) objects, except that you may use them to write to (or read from) an array of lines. C<tiehandle> capable as well. This is a subclass of L<IO::ScalarArray|IO::ScalarArray> in which the underlying array has its data stored in a line-oriented-format: that is, every element ends in a C<"\n">, with the possible exception of the final element. This makes C<getline()> I<much> more efficient; if you plan to do line-oriented reading/printing, you want this class. The C<print()> method will enforce this rule, so you can print arbitrary data to the line-array: it will break the data at newlines appropriately. See L<IO::ScalarArray> for full usage and warnings. =cut #------------------------------ # # getline # # Instance method, override. # Return the next line, or undef on end of data. # Can safely be called in an array context. # Currently, lines are delimited by "\n". # sub getline { my $self = shift; if (!defined $/) { return join( '', $self->_getlines_for_newlines ); } elsif ($/ eq "\n") { if (!*$self->{Pos}) { ### full line... return *$self->{AR}[*$self->{Str}++]; } else { ### partial line... my $partial = substr(*$self->{AR}[*$self->{Str}++], *$self->{Pos}); *$self->{Pos} = 0; return $partial; } } else { croak 'unsupported $/: must be "\n" or undef'; } } #------------------------------ # # getlines # # Instance method, override. # Return an array comprised of the remaining lines, or () on end of data. # Must be called in an array context. # Currently, lines are delimited by "\n". # sub getlines { my $self = shift; wantarray or croak("can't call getlines in scalar context!"); if ((defined $/) and ($/ eq "\n")) { return $self->_getlines_for_newlines(@_); } else { ### slow but steady return $self->SUPER::getlines(@_); } } #------------------------------ # # _getlines_for_newlines # # Instance method, private. # If $/ is newline, do fast getlines. # This CAN NOT invoke getline! # sub _getlines_for_newlines { my $self = shift; my ($rArray, $Str, $Pos) = @{*$self}{ qw( AR Str Pos ) }; my @partial = (); if ($Pos) { ### partial line... @partial = (substr( $rArray->[ $Str++ ], $Pos )); *$self->{Pos} = 0; } *$self->{Str} = scalar @$rArray; ### about to exhaust @$rArray return (@partial, @$rArray[ $Str .. $#$rArray ]); ### remaining full lines... } #------------------------------ # # print ARGS... # # Instance method, override. # Print ARGS to the underlying line array. # sub print { if (defined $\ && $\ ne "\n") { croak 'unsupported $\: must be "\n" or undef'; } my $self = shift; ### print STDERR "\n[[ARRAY WAS...\n", @{*$self->{AR}}, "<<EOF>>\n"; my @lines = split /^/, join('', @_); @lines or return 1; ### Did the previous print not end with a newline? ### If so, append first line: if (@{*$self->{AR}} and (*$self->{AR}[-1] !~ /\n\Z/)) { *$self->{AR}[-1] .= shift @lines; } push @{*$self->{AR}}, @lines; ### add the remainder ### print STDERR "\n[[ARRAY IS NOW...\n", @{*$self->{AR}}, "<<EOF>>\n"; 1; } #------------------------------ 1; __END__ =head1 VERSION $Id: Lines.pm,v 1.3 2005/02/10 21:21:53 dfs Exp $ =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut WrapTie.pm 0000444 00000034574 15160667122 0006476 0 ustar 00 package IO::WrapTie; use strict; use Exporter; # Inheritance, exporting, and package version: our @ISA = qw(Exporter); our @EXPORT = qw(wraptie); our $VERSION = '2.113'; # Function, exported. sub wraptie { IO::WrapTie::Master->new(@_); } # Class method; BACKWARDS-COMPATIBILITY ONLY! sub new { shift; IO::WrapTie::Master->new(@_); } #------------------------------------------------------------ package # hide from pause IO::WrapTie::Master; #------------------------------------------------------------ use strict; use vars qw($AUTOLOAD); use IO::Handle; # We inherit from IO::Handle to get methods which invoke i/o operators, # like print(), on our tied handle: our @ISA = qw(IO::Handle); #------------------------------ # new SLAVE, TIEARGS... #------------------------------ # Create a new subclass of IO::Handle which... # # (1) Handles i/o OPERATORS because it is tied to an instance of # an i/o-like class, like IO::Scalar. # # (2) Handles i/o METHODS by delegating them to that same tied object!. # # Arguments are the slave class (e.g., IO::Scalar), followed by all # the arguments normally sent into that class's C<TIEHANDLE> method. # In other words, much like the arguments to tie(). :-) # # NOTE: # The thing $x we return must be a BLESSED REF, for ($x->print()). # The underlying symbol must be a FILEHANDLE, for (print $x "foo"). # It has to have a way of getting to the "real" back-end object... # sub new { my $master = shift; my $io = IO::Handle->new; ### create a new handle my $slave = shift; tie *$io, $slave, @_; ### tie: will invoke slave's TIEHANDLE bless $io, $master; ### return a master } #------------------------------ # AUTOLOAD #------------------------------ # Delegate method invocations on the master to the underlying slave. # sub AUTOLOAD { my $method = $AUTOLOAD; $method =~ s/.*:://; my $self = shift; tied(*$self)->$method(\@_); } #------------------------------ # PRELOAD #------------------------------ # Utility. # # Most methods like print(), getline(), etc. which work on the tied object # via Perl's i/o operators (like 'print') are inherited from IO::Handle. # # Other methods, like seek() and sref(), we must delegate ourselves. # AUTOLOAD takes care of these. # # However, it may be necessary to preload delegators into your # own class. PRELOAD will do this. # sub PRELOAD { my $class = shift; foreach (@_) { eval "sub ${class}::$_ { my \$s = shift; tied(*\$s)->$_(\@_) }"; } } # Preload delegators for some standard methods which we can't simply # inherit from IO::Handle... for example, some IO::Handle methods # assume that there is an underlying file descriptor. # PRELOAD IO::WrapTie::Master qw(open opened close read clearerr eof seek tell setpos getpos); #------------------------------------------------------------ package # hide from pause IO::WrapTie::Slave; #------------------------------------------------------------ # Teeny private class providing a new_tie constructor... # # HOW IT ALL WORKS: # # Slaves inherit from this class. # # When you send a new_tie() message to a tie-slave class (like IO::Scalar), # it first determines what class should provide its master, via TIE_MASTER. # In this case, IO::Scalar->TIE_MASTER would return IO::Scalar::Master. # Then, we create a new master (an IO::Scalar::Master) with the same args # sent to new_tie. # # In general, the new() method of the master is inherited directly # from IO::WrapTie::Master. # sub new_tie { my $self = shift; $self->TIE_MASTER->new($self,@_); ### e.g., IO::Scalar::Master->new(@_) } # Default class method for new_tie(). # All your tie-slave class (like IO::Scalar) has to do is override this # method with a method that returns the name of an appropriate "master" # class for tying that slave. # sub TIE_MASTER { 'IO::WrapTie::Master' } #------------------------------ 1; __END__ package IO::WrapTie; ### for doc generator =head1 NAME IO::WrapTie - wrap tieable objects in IO::Handle interface I<This is currently Alpha code, released for comments. Please give me your feedback!> =head1 SYNOPSIS First of all, you'll need tie(), so: require 5.004; I<Function interface (experimental).> Use this with any existing class... use IO::WrapTie; use FooHandle; ### implements TIEHANDLE interface ### Suppose we want a "FooHandle->new(&FOO_RDWR, 2)". ### We can instead say... $FH = wraptie('FooHandle', &FOO_RDWR, 2); ### Now we can use... print $FH "Hello, "; ### traditional operator syntax... $FH->print("world!\n"); ### ...and OO syntax as well! I<OO interface (preferred).> You can inherit from the L<IO::WrapTie/"Slave"> mixin to get a nifty C<new_tie()> constructor... #------------------------------ package FooHandle; ### a class which can TIEHANDLE use IO::WrapTie; @ISA = qw(IO::WrapTie::Slave); ### inherit new_tie() ... #------------------------------ package main; $FH = FooHandle->new_tie(&FOO_RDWR, 2); ### $FH is an IO::WrapTie::Master print $FH "Hello, "; ### traditional operator syntax $FH->print("world!\n"); ### OO syntax See IO::Scalar as an example. It also shows you how to create classes which work both with and without 5.004. =head1 DESCRIPTION Suppose you have a class C<FooHandle>, where... =over 4 =item * C<FooHandle> does not inherit from L<IO::Handle>. That is, it performs file handle-like I/O, but to something other than an underlying file descriptor. Good examples are L<IO::Scalar> (for printing to a string) and L<IO::Lines> (for printing to an array of lines). =item * C<FooHandle> implements the C<TIEHANDLE> interface (see L<perltie>). That is, it provides methods C<TIEHANDLE>, C<GETC>, C<PRINT>, C<PRINTF>, C<READ>, and C<READLINE>. =item * C<FooHandle> implements the traditional OO interface of L<FileHandle> and L<IO::Handle>. i.e., it contains methods like C<getline>, C<read>, C<print>, C<seek>, C<tell>, C<eof>, etc. =back Normally, users of your class would have two options: =over 4 =item * B<Use only OO syntax,> and forsake named I/O operators like C<print>. =item * B<Use with tie,> and forsake treating it as a first-class object (i.e., class-specific methods can only be invoked through the underlying object via C<tied>... giving the object a "split personality"). =back But now with L<IO::WrapTie>, you can say: $WT = wraptie('FooHandle', &FOO_RDWR, 2); $WT->print("Hello, world\n"); ### OO syntax print $WT "Yes!\n"; ### Named operator syntax too! $WT->weird_stuff; ### Other methods! And if you're authoring a class like C<FooHandle>, just have it inherit from C<IO::WrapTie::Slave> and that first line becomes even prettier: $WT = FooHandle->new_tie(&FOO_RDWR, 2); B<The bottom line:> now, almost any class can look and work exactly like an L<IO::Handle> and be used both with OO and non-OO file handle syntax. =head1 HOW IT ALL WORKS =head2 The data structures Consider this example code, using classes in this distribution: use IO::Scalar; use IO::WrapTie; $WT = wraptie('IO::Scalar',\$s); print $WT "Hello, "; $WT->print("world!\n"); In it, the C<wraptie> function creates a data structure as follows: * $WT is a blessed reference to a tied filehandle $WT glob; that glob is tied to the "Slave" object. | * You would do all your i/o with $WT directly. | | | ,---isa--> IO::WrapTie::Master >--isa--> IO::Handle V / .-------------. | | | | * Perl i/o operators work on the tied object, | "Master" | invoking the C<TIEHANDLE> methods. | | * Method invocations are delegated to the tied | | slave. `-------------' | tied(*$WT) | .---isa--> IO::WrapTie::Slave V / .-------------. | | | "Slave" | * Instance of FileHandle-like class which doesn't | | actually use file descriptors, like IO::Scalar. | IO::Scalar | * The slave can be any kind of object. | | * Must implement the C<TIEHANDLE> interface. `-------------' I<NOTE:> just as an L<IO::Handle> is really just a blessed reference to a I<traditional> file handle glob. So also, an C<IO::WrapTie::Master> is really just a blessed reference to a file handle glob I<which has been tied to some "slave" class.> =head2 How C<wraptie> works =over 4 =item 1. The call to function C<wraptie(SLAVECLASS, TIEARGS...)> is passed onto C<IO::WrapTie::Master::new()>. Note that class C<IO::WrapTie::Master> is a subclass of L<IO::Handle>. =item 2. The C<< IO::WrapTie::Master->new >> method creates a new L<IO::Handle> object, re-blessed into class C<IO::WrapTie::Master>. This object is the I<master>, which will be returned from the constructor. At the same time... =item 3. The C<new> method also creates the I<slave>: this is an instance of C<SLAVECLASS> which is created by tying the master's L<IO::Handle> to C<SLAVECLASS> via C<tie>. This call to C<tie> creates the slave in the following manner: =item 4. Class C<SLAVECLASS> is sent the message C<TIEHANDLE>; it will usually delegate this to C<< SLAVECLASS->new(TIEARGS) >>, resulting in a new instance of C<SLAVECLASS> being created and returned. =item 5. Once both master and slave have been created, the master is returned to the caller. =back =head2 How I/O operators work (on the master) Consider using an i/o operator on the master: print $WT "Hello, world!\n"; Since the master C<$WT> is really a C<blessed> reference to a glob, the normal Perl I/O operators like C<print> may be used on it. They will just operate on the symbol part of the glob. Since the glob is tied to the slave, the slave's C<PRINT> method (part of the C<TIEHANDLE> interface) will be automatically invoked. If the slave is an L<IO::Scalar>, that means L<IO::Scalar/"PRINT"> will be invoked, and that method happens to delegate to the C<print> method of the same class. So the I<real> work is ultimately done by L<IO::Scalar/"print">. =head2 How methods work (on the master) Consider using a method on the master: $WT->print("Hello, world!\n"); Since the master C<$WT> is blessed into the class C<IO::WrapTie::Master>, Perl first attempts to find a C<print> method there. Failing that, Perl next attempts to find a C<print> method in the super class, L<IO::Handle>. It just so happens that there I<is> such a method; that method merely invokes the C<print> I/O operator on the self object... and for that, see above! But let's suppose we're dealing with a method which I<isn't> part of L<IO::Handle>... for example: my $sref = $WT->sref; In this case, the intuitive behavior is to have the master delegate the method invocation to the slave (now do you see where the designations come from?). This is indeed what happens: C<IO::WrapTie::Master> contains an C<AUTOLOAD> method which performs the delegation. So: when C<sref> can't be found in L<IO::Handle>, the C<AUTOLOAD> method of C<IO::WrapTie::Master> is invoked, and the standard behavior of delegating the method to the underlying slave (here, an L<IO::Scalar>) is done. Sometimes, to get this to work properly, you may need to create a subclass of C<IO::WrapTie::Master> which is an effective master for I<your> class, and do the delegation there. =head1 NOTES B<Why not simply use the object's OO interface?> Because that means forsaking the use of named operators like C<print>, and you may need to pass the object to a subroutine which will attempt to use those operators: $O = FooHandle->new(&FOO_RDWR, 2); $O->print("Hello, world\n"); ### OO syntax is okay, BUT.... sub nope { print $_[0] "Nope!\n" } X nope($O); ### ERROR!!! (not a glob ref) B<Why not simply use tie()?> Because (1) you have to use C<tied> to invoke methods in the object's public interface (yuck), and (2) you may need to pass the tied symbol to another subroutine which will attempt to treat it in an OO-way... and that will break it: tie *T, 'FooHandle', &FOO_RDWR, 2; print T "Hello, world\n"; ### Operator is okay, BUT... tied(*T)->other_stuff; ### yuck! AND... sub nope { shift->print("Nope!\n") } X nope(\*T); ### ERROR!!! (method "print" on unblessed ref) B<Why a master and slave?> Why not simply write C<FooHandle> to inherit from L<IO::Handle?> I tried this, with an implementation similar to that of L<IO::Socket>. The problem is that I<the whole point is to use this with objects that don't have an underlying file/socket descriptor.>. Subclassing L<IO::Handle> will work fine for the OO stuff, and fine with named operators I<if> you C<tie>... but if you just attempt to say: $IO = FooHandle->new(&FOO_RDWR, 2); print $IO "Hello!\n"; you get a warning from Perl like: Filehandle GEN001 never opened because it's trying to do system-level I/O on an (unopened) file descriptor. To avoid this, you apparently have to C<tie> the handle... which brings us right back to where we started! At least the L<IO::WrapTie> mixin lets us say: $IO = FooHandle->new_tie(&FOO_RDWR, 2); print $IO "Hello!\n"; and so is not I<too> bad. C<:-)> =head1 WARNINGS Remember: this stuff is for doing L<FileHandle>-like I/O on things I<without underlying file descriptors>. If you have an underlying file descriptor, you're better off just inheriting from L<IO::Handle>. B<Be aware that new_tie() always returns an instance of a kind of IO::WrapTie::Master...> it does B<not> return an instance of the I/O class you're tying to! Invoking some methods on the master object causes C<AUTOLOAD> to delegate them to the slave object... so it I<looks> like you're manipulating a C<FooHandle> object directly, but you're not. I have not explored all the ramifications of this use of C<tie>. I<Here there be dragons>. =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Wrap.pm 0000444 00000021125 15160667122 0006020 0 ustar 00 package IO::Wrap; use strict; use Exporter; use FileHandle; use Carp; our $VERSION = '2.113'; our @ISA = qw(Exporter); our @EXPORT = qw(wraphandle); #------------------------------ # wraphandle RAW #------------------------------ sub wraphandle { my $raw = shift; new IO::Wrap $raw; } #------------------------------ # new STREAM #------------------------------ sub new { my ($class, $stream) = @_; no strict 'refs'; ### Convert raw scalar to globref: ref($stream) or $stream = \*$stream; ### Wrap globref and incomplete objects: if ((ref($stream) eq 'GLOB') or ### globref (ref($stream) eq 'FileHandle') && !defined(&FileHandle::read)) { return bless \$stream, $class; } $stream; ### already okay! } #------------------------------ # I/O methods... #------------------------------ sub close { my $self = shift; return close($$self); } sub fileno { my $self = shift; my $fh = $$self; return fileno($fh); } sub getline { my $self = shift; my $fh = $$self; return scalar(<$fh>); } sub getlines { my $self = shift; wantarray or croak("Can't call getlines in scalar context!"); my $fh = $$self; <$fh>; } sub print { my $self = shift; print { $$self } @_; } sub read { my $self = shift; return read($$self, $_[0], $_[1]); } sub seek { my $self = shift; return seek($$self, $_[0], $_[1]); } sub tell { my $self = shift; return tell($$self); } 1; __END__ =head1 NAME IO::Wrap - Wrap raw filehandles in the IO::Handle interface =head1 SYNOPSIS use strict; use warnings; use IO::Wrap; # this is a fairly senseless use case as IO::Handle already does this. my $wrap_fh = IO::Wrap->new(\*STDIN); my $line = $wrap_fh->getline(); # Do stuff with any kind of filehandle (including a bare globref), or # any kind of blessed object that responds to a print() message. # already have a globref? a FileHandle? a scalar filehandle name? $wrap_fh = IO::Wrap->new($some_unknown_thing); # At this point, we know we have an IO::Handle-like object! YAY $wrap_fh->print("Hey there!"); You can also do this using a convenience wrapper function use strict; use warnings; use IO::Wrap qw(wraphandle); # this is a fairly senseless use case as IO::Handle already does this. my $wrap_fh = wraphandle(\*STDIN); my $line = $wrap_fh->getline(); # Do stuff with any kind of filehandle (including a bare globref), or # any kind of blessed object that responds to a print() message. # already have a globref? a FileHandle? a scalar filehandle name? $wrap_fh = wraphandle($some_unknown_thing); # At this point, we know we have an IO::Handle-like object! YAY $wrap_fh->print("Hey there!"); =head1 DESCRIPTION Let's say you want to write some code which does I/O, but you don't want to force the caller to provide you with a L<FileHandle> or L<IO::Handle> object. You want them to be able to say: do_stuff(\*STDOUT); do_stuff('STDERR'); do_stuff($some_FileHandle_object); do_stuff($some_IO_Handle_object); And even: do_stuff($any_object_with_a_print_method); Sure, one way to do it is to force the caller to use C<tiehandle()>. But that puts the burden on them. Another way to do it is to use B<IO::Wrap>. Clearly, when wrapping a raw external filehandle (like C<\*STDOUT>), I didn't want to close the file descriptor when the wrapper object is destroyed; the user might not appreciate that! Hence, there's no C<DESTROY> method in this class. When wrapping a L<FileHandle> object, however, I believe that Perl will invoke the C<FileHandle::DESTROY> when the last reference goes away, so in that case, the filehandle is closed if the wrapped L<FileHandle> really was the last reference to it. =head1 FUNCTIONS L<IO::Wrap> makes the following functions available. =head2 wraphandle # wrap a filehandle glob my $fh = wraphandle(\*STDIN); # wrap a raw filehandle glob by name $fh = wraphandle('STDIN'); # wrap a handle in an object $fh = wraphandle('Class::HANDLE'); # wrap a blessed FileHandle object use FileHandle; my $fho = FileHandle->new("/tmp/foo.txt", "r"); $fh = wraphandle($fho); # wrap any other blessed object that shares IO::Handle's interface $fh = wraphandle($some_object); This function is simply a wrapper to the L<IO::Wrap/"new"> constructor method. =head1 METHODS L<IO::Wrap> implements the following methods. =head2 close $fh->close(); The C<close> method will attempt to close the system file descriptor. For a more complete description, read L<perlfunc/close>. =head2 fileno my $int = $fh->fileno(); The C<fileno> method returns the file descriptor for the wrapped filehandle. See L<perlfunc/fileno> for more information. =head2 getline my $data = $fh->getline(); The C<getline> method mimics the function by the same name in L<IO::Handle>. It's like calling C<< my $data = <$fh>; >> but only in scalar context. =head2 getlines my @data = $fh->getlines(); The C<getlines> method mimics the function by the same name in L<IO::Handle>. It's like calling C<< my @data = <$fh>; >> but only in list context. Calling this method in scalar context will result in a croak. =head2 new # wrap a filehandle glob my $fh = IO::Wrap->new(\*STDIN); # wrap a raw filehandle glob by name $fh = IO::Wrap->new('STDIN'); # wrap a handle in an object $fh = IO::Wrap->new('Class::HANDLE'); # wrap a blessed FileHandle object use FileHandle; my $fho = FileHandle->new("/tmp/foo.txt", "r"); $fh = IO::Wrap->new($fho); # wrap any other blessed object that shares IO::Handle's interface $fh = IO::Wrap->new($some_object); The C<new> constructor method takes in a single argument and decides to wrap it or not it based on what it seems to be. A raw scalar file handle name, like C<"STDOUT"> or C<"Class::HANDLE"> can be wrapped, returning an L<IO::Wrap> object instance. A raw filehandle glob, like C<\*STDOUT> can also be wrapped, returning an L<IO::Wrawp> object instance. A blessed L<FileHandle> object can also be wrapped. This is a special case where an L<IO::Wrap> object instance will only be returned in the case that your L<FileHandle> object doesn't support the C<read> method. Also, any other kind of blessed object that conforms to the L<IO::Handle> interface can be passed in. In this case, you just get back that object. In other words, we only wrap it into an L<IO::Wrap> object when what you've supplied doesn't already conform to the L<IO::Handle> interface. If you get back an L<IO::Wrap> object, it will obey a basic subset of the C<IO::> interface. It will do so with object B<methods>, not B<operators>. =head3 CAVEATS This module does not allow you to wrap filehandle names which are given as strings that lack the package they were opened in. That is, if a user opens FOO in package Foo, they must pass it to you either as C<\*FOO> or as C<"Foo::FOO">. However, C<"STDIN"> and friends will work just fine. =head2 print $fh->print("Some string"); $fh->print("more", " than one", " string"); The C<print> method will attempt to print a string or list of strings to the filehandle. For a more complete description, read L<perlfunc/print>. =head2 read my $buffer; # try to read 30 chars into the buffer starting at the # current cursor position. my $num_chars_read = $fh->read($buffer, 30); The L<read> method attempts to read a number of characters, starting at the filehandle's current cursor position. It returns the number of characters actually read. See L<perlfunc/read> for more information. =head2 seek use Fcntl qw(:seek); # import the SEEK_CUR, SEEK_SET, SEEK_END constants # seek to the position in bytes $fh->seek(0, SEEK_SET); # seek to the position in bytes from the current position $fh->seek(22, SEEK_CUR); # seek to the EOF plus bytes $fh->seek(0, SEEK_END); The C<seek> method will attempt to set the cursor to a given position in bytes for the wrapped file handle. See L<perlfunc/seek> for more information. =head2 tell my $bytes = $fh->tell(); The C<tell> method will attempt to return the current position of the cursor in bytes for the wrapped file handle. See L<perlfunc/tell> for more information. =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut ScalarArray.pm 0000444 00000040077 15160667122 0007322 0 ustar 00 package IO::ScalarArray; use strict; use Carp; use IO::Handle; # The package version, both in 1.23 style *and* usable by MakeMaker: our $VERSION = '2.113'; # Inheritance: our @ISA = qw(IO::Handle); require IO::WrapTie and push @ISA, 'IO::WrapTie::Slave' if ($] >= 5.004); =head1 NAME IO::ScalarArray - IO:: interface for reading/writing an array of scalars =head1 SYNOPSIS Perform I/O on strings, using the basic OO interface... use IO::ScalarArray; @data = ("My mes", "sage:\n"); ### Open a handle on an array, and append to it: $AH = new IO::ScalarArray \@data; $AH->print("Hello"); $AH->print(", world!\nBye now!\n"); print "The array is now: ", @data, "\n"; ### Open a handle on an array, read it line-by-line, then close it: $AH = new IO::ScalarArray \@data; while (defined($_ = $AH->getline)) { print "Got line: $_"; } $AH->close; ### Open a handle on an array, and slurp in all the lines: $AH = new IO::ScalarArray \@data; print "All lines:\n", $AH->getlines; ### Get the current position (either of two ways): $pos = $AH->getpos; $offset = $AH->tell; ### Set the current position (either of two ways): $AH->setpos($pos); $AH->seek($offset, 0); ### Open an anonymous temporary array: $AH = new IO::ScalarArray; $AH->print("Hi there!"); print "I printed: ", @{$AH->aref}, "\n"; ### get at value Don't like OO for your I/O? No problem. Thanks to the magic of an invisible tie(), the following now works out of the box, just as it does with IO::Handle: use IO::ScalarArray; @data = ("My mes", "sage:\n"); ### Open a handle on an array, and append to it: $AH = new IO::ScalarArray \@data; print $AH "Hello"; print $AH ", world!\nBye now!\n"; print "The array is now: ", @data, "\n"; ### Open a handle on a string, read it line-by-line, then close it: $AH = new IO::ScalarArray \@data; while (<$AH>) { print "Got line: $_"; } close $AH; ### Open a handle on a string, and slurp in all the lines: $AH = new IO::ScalarArray \@data; print "All lines:\n", <$AH>; ### Get the current position (WARNING: requires 5.6): $offset = tell $AH; ### Set the current position (WARNING: requires 5.6): seek $AH, $offset, 0; ### Open an anonymous temporary scalar: $AH = new IO::ScalarArray; print $AH "Hi there!"; print "I printed: ", @{$AH->aref}, "\n"; ### get at value And for you folks with 1.x code out there: the old tie() style still works, though this is I<unnecessary and deprecated>: use IO::ScalarArray; ### Writing to a scalar... my @a; tie *OUT, 'IO::ScalarArray', \@a; print OUT "line 1\nline 2\n", "line 3\n"; print "Array is now: ", @a, "\n" ### Reading and writing an anonymous scalar... tie *OUT, 'IO::ScalarArray'; print OUT "line 1\nline 2\n", "line 3\n"; tied(OUT)->seek(0,0); while (<OUT>) { print "Got line: ", $_; } =head1 DESCRIPTION This class is part of the IO::Stringy distribution; see L<IO::Stringy> for change log and general information. The IO::ScalarArray class implements objects which behave just like IO::Handle (or FileHandle) objects, except that you may use them to write to (or read from) arrays of scalars. Logically, an array of scalars defines an in-core "file" whose contents are the concatenation of the scalars in the array. The handles created by this class are automatically C<tiehandle>d (though please see L<"WARNINGS"> for information relevant to your Perl version). For writing large amounts of data with individual print() statements, this class is likely to be more efficient than IO::Scalar. Basically, this: my @a; $AH = new IO::ScalarArray \@a; $AH->print("Hel", "lo, "); ### OO style $AH->print("world!\n"); ### ditto Or this: my @a; $AH = new IO::ScalarArray \@a; print $AH "Hel", "lo, "; ### non-OO style print $AH "world!\n"; ### ditto Causes @a to be set to the following array of 3 strings: ( "Hel" , "lo, " , "world!\n" ) See L<IO::Scalar> and compare with this class. =head1 PUBLIC INTERFACE =head2 Construction =over 4 =cut #------------------------------ =item new [ARGS...] I<Class method.> Return a new, unattached array handle. If any arguments are given, they're sent to open(). =cut sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = bless \do { local *FH }, $class; tie *$self, $class, $self; $self->open(@_); ### open on anonymous by default $self; } sub DESTROY { shift->close; } #------------------------------ =item open [ARRAYREF] I<Instance method.> Open the array handle on a new array, pointed to by ARRAYREF. If no ARRAYREF is given, a "private" array is created to hold the file data. Returns the self object on success, undefined on error. =cut sub open { my ($self, $aref) = @_; ### Sanity: defined($aref) or do {my @a; $aref = \@a}; (ref($aref) eq "ARRAY") or croak "open needs a ref to a array"; ### Setup: $self->setpos([0,0]); *$self->{AR} = $aref; $self; } #------------------------------ =item opened I<Instance method.> Is the array handle opened on something? =cut sub opened { *{shift()}->{AR}; } #------------------------------ =item close I<Instance method.> Disassociate the array handle from its underlying array. Done automatically on destroy. =cut sub close { my $self = shift; %{*$self} = (); 1; } =back =cut #============================== =head2 Input and output =over 4 =cut #------------------------------ =item flush I<Instance method.> No-op, provided for OO compatibility. =cut sub flush { "0 but true" } #------------------------------ =item fileno I<Instance method.> No-op, returns undef =cut sub fileno { } #------------------------------ =item getc I<Instance method.> Return the next character, or undef if none remain. This does a read(1), which is somewhat costly. =cut sub getc { my $buf = ''; ($_[0]->read($buf, 1) ? $buf : undef); } #------------------------------ =item getline I<Instance method.> Return the next line, or undef on end of data. Can safely be called in an array context. Currently, lines are delimited by "\n". =cut sub getline { my $self = shift; my ($str, $line) = (undef, ''); ### Minimal impact implementation! ### We do the fast thing (no regexps) if using the ### classic input record separator. ### Case 1: $/ is undef: slurp all... if (!defined($/)) { return undef if ($self->eof); ### Get the rest of the current string, followed by remaining strings: my $ar = *$self->{AR}; my @slurp = ( substr($ar->[*$self->{Str}], *$self->{Pos}), @$ar[(1 + *$self->{Str}) .. $#$ar ] ); ### Seek to end: $self->_setpos_to_eof; return join('', @slurp); } ### Case 2: $/ is "\n": elsif ($/ eq "\012") { ### Until we hit EOF (or exited because of a found line): until ($self->eof) { ### If at end of current string, go fwd to next one (won't be EOF): if ($self->_eos) {++*$self->{Str}, *$self->{Pos}=0}; ### Get ref to current string in array, and set internal pos mark: $str = \(*$self->{AR}[*$self->{Str}]); ### get current string pos($$str) = *$self->{Pos}; ### start matching from here ### Get from here to either \n or end of string, and add to line: $$str =~ m/\G(.*?)((\n)|\Z)/g; ### match to 1st \n or EOS $line .= $1.$2; ### add it *$self->{Pos} += length($1.$2); ### move fwd by len matched return $line if $3; ### done, got line with "\n" } return ($line eq '') ? undef : $line; ### return undef if EOF } ### Case 3: $/ is ref to int. Bail out. elsif (ref($/)) { croak '$/ given as a ref to int; currently unsupported'; } ### Case 4: $/ is either "" (paragraphs) or something weird... ### Bail for now. else { croak '$/ as given is currently unsupported'; } } #------------------------------ =item getlines I<Instance method.> Get all remaining lines. It will croak() if accidentally called in a scalar context. =cut sub getlines { my $self = shift; wantarray or croak("can't call getlines in scalar context!"); my ($line, @lines); push @lines, $line while (defined($line = $self->getline)); @lines; } #------------------------------ =item print ARGS... I<Instance method.> Print ARGS to the underlying array. Currently, this always causes a "seek to the end of the array" and generates a new array entry. This may change in the future. =cut sub print { my $self = shift; push @{*$self->{AR}}, join('', @_) . (defined($\) ? $\ : ""); ### add the data $self->_setpos_to_eof; 1; } #------------------------------ =item read BUF, NBYTES, [OFFSET]; I<Instance method.> Read some bytes from the array. Returns the number of bytes actually read, 0 on end-of-file, undef on error. =cut sub read { my $self = $_[0]; ### we must use $_[1] as a ref my $n = $_[2]; my $off = $_[3] || 0; ### print "getline\n"; my $justread; my $len; ($off ? substr($_[1], $off) : $_[1]) = ''; ### Stop when we have zero bytes to go, or when we hit EOF: my @got; until (!$n or $self->eof) { ### If at end of current string, go forward to next one (won't be EOF): if ($self->_eos) { ++*$self->{Str}; *$self->{Pos} = 0; } ### Get longest possible desired substring of current string: $justread = substr(*$self->{AR}[*$self->{Str}], *$self->{Pos}, $n); $len = length($justread); push @got, $justread; $n -= $len; *$self->{Pos} += $len; } $_[1] .= join('', @got); return length($_[1])-$off; } #------------------------------ =item write BUF, NBYTES, [OFFSET]; I<Instance method.> Write some bytes into the array. =cut sub write { my $self = $_[0]; my $n = $_[2]; my $off = $_[3] || 0; my $data = substr($_[1], $n, $off); $n = length($data); $self->print($data); return $n; } =back =cut #============================== =head2 Seeking/telling and other attributes =over 4 =cut #------------------------------ =item autoflush I<Instance method.> No-op, provided for OO compatibility. =cut sub autoflush {} #------------------------------ =item binmode I<Instance method.> No-op, provided for OO compatibility. =cut sub binmode {} #------------------------------ =item clearerr I<Instance method.> Clear the error and EOF flags. A no-op. =cut sub clearerr { 1 } #------------------------------ =item eof I<Instance method.> Are we at end of file? =cut sub eof { ### print "checking EOF [*$self->{Str}, *$self->{Pos}]\n"; ### print "SR = ", $#{*$self->{AR}}, "\n"; return 0 if (*{$_[0]}->{Str} < $#{*{$_[0]}->{AR}}); ### before EOA return 1 if (*{$_[0]}->{Str} > $#{*{$_[0]}->{AR}}); ### after EOA ### ### at EOA, past EOS: ((*{$_[0]}->{Str} == $#{*{$_[0]}->{AR}}) && ($_[0]->_eos)); } #------------------------------ # # _eos # # I<Instance method, private.> Are we at end of the CURRENT string? # sub _eos { (*{$_[0]}->{Pos} >= length(*{$_[0]}->{AR}[*{$_[0]}->{Str}])); ### past last char } #------------------------------ =item seek POS,WHENCE I<Instance method.> Seek to a given position in the stream. Only a WHENCE of 0 (SEEK_SET) is supported. =cut sub seek { my ($self, $pos, $whence) = @_; ### Seek: if ($whence == 0) { $self->_seek_set($pos); } elsif ($whence == 1) { $self->_seek_cur($pos); } elsif ($whence == 2) { $self->_seek_end($pos); } else { croak "bad seek whence ($whence)" } return 1; } #------------------------------ # # _seek_set POS # # Instance method, private. # Seek to $pos relative to start: # sub _seek_set { my ($self, $pos) = @_; ### Advance through array until done: my $istr = 0; while (($pos >= 0) && ($istr < scalar(@{*$self->{AR}}))) { if (length(*$self->{AR}[$istr]) > $pos) { ### it's in this string! return $self->setpos([$istr, $pos]); } else { ### it's in next string $pos -= length(*$self->{AR}[$istr++]); ### move forward one string } } ### If we reached this point, pos is at or past end; zoom to EOF: return $self->_setpos_to_eof; } #------------------------------ # # _seek_cur POS # # Instance method, private. # Seek to $pos relative to current position. # sub _seek_cur { my ($self, $pos) = @_; $self->_seek_set($self->tell + $pos); } #------------------------------ # # _seek_end POS # # Instance method, private. # Seek to $pos relative to end. # We actually seek relative to beginning, which is simple. # sub _seek_end { my ($self, $pos) = @_; $self->_seek_set($self->_tell_eof + $pos); } #------------------------------ =item tell I<Instance method.> Return the current position in the stream, as a numeric offset. =cut sub tell { my $self = shift; my $off = 0; my ($s, $str_s); for ($s = 0; $s < *$self->{Str}; $s++) { ### count all "whole" scalars defined($str_s = *$self->{AR}[$s]) or $str_s = ''; ###print STDERR "COUNTING STRING $s (". length($str_s) . ")\n"; $off += length($str_s); } ###print STDERR "COUNTING POS ($self->{Pos})\n"; return ($off += *$self->{Pos}); ### plus the final, partial one } #------------------------------ # # _tell_eof # # Instance method, private. # Get position of EOF, as a numeric offset. # This is identical to the size of the stream - 1. # sub _tell_eof { my $self = shift; my $len = 0; foreach (@{*$self->{AR}}) { $len += length($_) } $len; } #------------------------------ =item setpos POS I<Instance method.> Seek to a given position in the array, using the opaque getpos() value. Don't expect this to be a number. =cut sub setpos { my ($self, $pos) = @_; (ref($pos) eq 'ARRAY') or die "setpos: only use a value returned by getpos!\n"; (*$self->{Str}, *$self->{Pos}) = @$pos; } #------------------------------ # # _setpos_to_eof # # Fast-forward to EOF. # sub _setpos_to_eof { my $self = shift; $self->setpos([scalar(@{*$self->{AR}}), 0]); } #------------------------------ =item getpos I<Instance method.> Return the current position in the array, as an opaque value. Don't expect this to be a number. =cut sub getpos { [*{$_[0]}->{Str}, *{$_[0]}->{Pos}]; } #------------------------------ =item aref I<Instance method.> Return a reference to the underlying array. =cut sub aref { *{shift()}->{AR}; } =back =cut #------------------------------ # Tied handle methods... #------------------------------ ### Conventional tiehandle interface: sub TIEHANDLE { (defined($_[1]) && UNIVERSAL::isa($_[1],"IO::ScalarArray")) ? $_[1] : shift->new(@_) } sub GETC { shift->getc(@_) } sub PRINT { shift->print(@_) } sub PRINTF { shift->print(sprintf(shift, @_)) } sub READ { shift->read(@_) } sub READLINE { wantarray ? shift->getlines(@_) : shift->getline(@_) } sub WRITE { shift->write(@_); } sub CLOSE { shift->close(@_); } sub SEEK { shift->seek(@_); } sub TELL { shift->tell(@_); } sub EOF { shift->eof(@_); } sub BINMODE { 1; } #------------------------------------------------------------ 1; __END__ # SOME PRIVATE NOTES: # # * The "current position" is the position before the next # character to be read/written. # # * Str gives the string index of the current position, 0-based # # * Pos gives the offset within AR[Str], 0-based. # # * Inital pos is [0,0]. After print("Hello"), it is [1,0]. =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut HTML.pm 0000444 00000046353 15160667122 0005665 0 ustar 00 #--------------------------------------------------------------------- package IO::HTML; # # Copyright 2020 Christopher J. Madsen # # Author: Christopher J. Madsen <perl@cjmweb.net> # Created: 14 Jan 2012 # # This program is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the # GNU General Public License or the Artistic License for more details. # # ABSTRACT: Open an HTML file with automatic charset detection #--------------------------------------------------------------------- use 5.008; use strict; use warnings; use Carp 'croak'; use Encode 2.10 qw(decode find_encoding); # need utf-8-strict encoding use Exporter 5.57 'import'; our $VERSION = '1.004'; # This file is part of IO-HTML 1.004 (September 26, 2020) our $bytes_to_check ||= 1024; our $default_encoding ||= 'cp1252'; our @EXPORT = qw(html_file); our @EXPORT_OK = qw(find_charset_in html_file_and_encoding html_outfile sniff_encoding); our %EXPORT_TAGS = ( rw => [qw( html_file html_file_and_encoding html_outfile )], all => [ @EXPORT, @EXPORT_OK ], ); #===================================================================== sub html_file { (&html_file_and_encoding)[0]; # return just the filehandle } # end html_file # Note: I made html_file and html_file_and_encoding separate functions # (instead of making html_file context-sensitive) because I wanted to # use html_file in function calls (i.e. list context) without having # to write "scalar html_file" all the time. sub html_file_and_encoding { my ($filename, $options) = @_; $options ||= {}; open(my $in, '<:raw', $filename) or croak "Failed to open $filename: $!"; my ($encoding, $bom) = sniff_encoding($in, $filename, $options); if (not defined $encoding) { croak "No default encoding specified" unless defined($encoding = $default_encoding); $encoding = find_encoding($encoding) if $options->{encoding}; } # end if we didn't find an encoding binmode $in, sprintf(":encoding(%s):crlf", $options->{encoding} ? $encoding->name : $encoding); return ($in, $encoding, $bom); } # end html_file_and_encoding #--------------------------------------------------------------------- sub html_outfile { my ($filename, $encoding, $bom) = @_; if (not defined $encoding) { croak "No default encoding specified" unless defined($encoding = $default_encoding); } # end if we didn't find an encoding elsif (ref $encoding) { $encoding = $encoding->name; } open(my $out, ">:encoding($encoding)", $filename) or croak "Failed to open $filename: $!"; print $out "\x{FeFF}" if $bom; return $out; } # end html_outfile #--------------------------------------------------------------------- sub sniff_encoding { my ($in, $filename, $options) = @_; $filename = 'file' unless defined $filename; $options ||= {}; my $pos = tell $in; croak "Could not seek $filename: $!" if $pos < 0; croak "Could not read $filename: $!" unless defined read $in, my($buf), $bytes_to_check; seek $in, $pos, 0 or croak "Could not seek $filename: $!"; # Check for BOM: my $bom; my $encoding = do { if ($buf =~ /^\xFe\xFF/) { $bom = 2; 'UTF-16BE'; } elsif ($buf =~ /^\xFF\xFe/) { $bom = 2; 'UTF-16LE'; } elsif ($buf =~ /^\xEF\xBB\xBF/) { $bom = 3; 'utf-8-strict'; } else { find_charset_in($buf, $options); # check for <meta charset> } }; # end $encoding if ($bom) { seek $in, $bom, 1 or croak "Could not seek $filename: $!"; $bom = 1; } elsif (not defined $encoding) { # try decoding as UTF-8 my $test = decode('utf-8-strict', $buf, Encode::FB_QUIET); if ($buf =~ /^(?: # nothing left over | [\xC2-\xDF] # incomplete 2-byte char | [\xE0-\xEF] [\x80-\xBF]? # incomplete 3-byte char | [\xF0-\xF4] [\x80-\xBF]{0,2} # incomplete 4-byte char )\z/x and $test =~ /[^\x00-\x7F]/) { $encoding = 'utf-8-strict'; } # end if valid UTF-8 with at least one multi-byte character: } # end if testing for UTF-8 if (defined $encoding and $options->{encoding} and not ref $encoding) { $encoding = find_encoding($encoding); } # end if $encoding is a string and we want an object return wantarray ? ($encoding, $bom) : $encoding; } # end sniff_encoding #===================================================================== # Based on HTML5 8.2.2.2 Determining the character encoding: # Get attribute from current position of $_ sub _get_attribute { m!\G[\x09\x0A\x0C\x0D /]+!gc; # skip whitespace or / return if /\G>/gc or not /\G(=?[^\x09\x0A\x0C\x0D =]*)/gc; my ($name, $value) = (lc $1, ''); if (/\G[\x09\x0A\x0C\x0D ]*=[\x09\x0A\x0C\x0D ]*/gc) { if (/\G"/gc) { # Double-quoted attribute value /\G([^"]*)("?)/gc; return unless $2; # Incomplete attribute (missing closing quote) $value = lc $1; } elsif (/\G'/gc) { # Single-quoted attribute value /\G([^']*)('?)/gc; return unless $2; # Incomplete attribute (missing closing quote) $value = lc $1; } else { # Unquoted attribute value /\G([^\x09\x0A\x0C\x0D >]*)/gc; $value = lc $1; } } # end if attribute has value return wantarray ? ($name, $value) : 1; } # end _get_attribute # Examine a meta value for a charset: sub _get_charset_from_meta { for (shift) { while (/charset[\x09\x0A\x0C\x0D ]*=[\x09\x0A\x0C\x0D ]*/ig) { return $1 if (/\G"([^"]*)"/gc or /\G'([^']*)'/gc or /\G(?!['"])([^\x09\x0A\x0C\x0D ;]+)/gc); } } # end for value return undef; } # end _get_charset_from_meta #--------------------------------------------------------------------- sub find_charset_in { for (shift) { my $options = shift || {}; # search only the first $bytes_to_check bytes (default 1024) my $stop = length > $bytes_to_check ? $bytes_to_check : length; my $expect_pragma = (defined $options->{need_pragma} ? $options->{need_pragma} : 1); pos() = 0; while (pos() < $stop) { if (/\G<!--.*?(?<=--)>/sgc) { } # Skip comment elsif (m!\G<meta(?=[\x09\x0A\x0C\x0D /])!gic) { my ($got_pragma, $need_pragma, $charset); while (my ($name, $value) = &_get_attribute) { if ($name eq 'http-equiv' and $value eq 'content-type') { $got_pragma = 1; } elsif ($name eq 'content' and not defined $charset) { $need_pragma = $expect_pragma if defined($charset = _get_charset_from_meta($value)); } elsif ($name eq 'charset') { $charset = $value; $need_pragma = 0; } } # end while more attributes in this <meta> tag if (defined $need_pragma and (not $need_pragma or $got_pragma)) { $charset = 'UTF-8' if $charset =~ /^utf-?16/; $charset = 'cp1252' if $charset eq 'iso-8859-1'; # people lie if (my $encoding = find_encoding($charset)) { return $options->{encoding} ? $encoding : $encoding->name; } # end if charset is a recognized encoding } # end if found charset } # end elsif <meta elsif (m!\G</?[a-zA-Z][^\x09\x0A\x0C\x0D >]*!gc) { 1 while &_get_attribute; } # end elsif some other tag elsif (m{\G<[!/?][^>]*}gc) { } # skip unwanted things elsif (m/\G</gc) { } # skip < that doesn't open anything we recognize # Advance to the next <: m/\G[^<]+/gc; } # end while not at search boundary } # end for string return undef; # Couldn't find a charset } # end find_charset_in #--------------------------------------------------------------------- # Shortcuts for people who don't like exported functions: *file = \&html_file; *file_and_encoding = \&html_file_and_encoding; *outfile = \&html_outfile; #===================================================================== # Package Return Value: 1; __END__ =head1 NAME IO::HTML - Open an HTML file with automatic charset detection =head1 VERSION This document describes version 1.004 of IO::HTML, released September 26, 2020. =head1 SYNOPSIS use IO::HTML; # exports html_file by default use HTML::TreeBuilder; my $tree = HTML::TreeBuilder->new_from_file( html_file('foo.html') ); # Alternative interface: open(my $in, '<:raw', 'bar.html'); my $encoding = IO::HTML::sniff_encoding($in, 'bar.html'); =head1 DESCRIPTION IO::HTML provides an easy way to open a file containing HTML while automatically determining its encoding. It uses the HTML5 encoding sniffing algorithm specified in section 8.2.2.2 of the draft standard. The algorithm as implemented here is: =over =item 1. If the file begins with a byte order mark indicating UTF-16LE, UTF-16BE, or UTF-8, then that is the encoding. =item 2. If the first C<$bytes_to_check> bytes of the file contain a C<< <meta> >> tag that indicates the charset, and Encode recognizes the specified charset name, then that is the encoding. (This portion of the algorithm is implemented by C<find_charset_in>.) The C<< <meta> >> tag can be in one of two formats: <meta charset="..."> <meta http-equiv="Content-Type" content="...charset=..."> The search is case-insensitive, and the order of attributes within the tag is irrelevant. Any additional attributes of the tag are ignored. The first matching tag with a recognized encoding ends the search. =item 3. If the first C<$bytes_to_check> bytes of the file are valid UTF-8 (with at least 1 non-ASCII character), then the encoding is UTF-8. =item 4. If all else fails, use the default character encoding. The HTML5 standard suggests the default encoding should be locale dependent, but currently it is always C<cp1252> unless you set C<$IO::HTML::default_encoding> to a different value. Note: C<sniff_encoding> does not apply this step; only C<html_file> does that. =back =head1 SUBROUTINES =head2 html_file $filehandle = html_file($filename, \%options); This function (exported by default) is the primary entry point. It opens the file specified by C<$filename> for reading, uses C<sniff_encoding> to find a suitable encoding layer, and applies it. It also applies the C<:crlf> layer. If the file begins with a BOM, the filehandle is positioned just after the BOM. The optional second argument is a hashref containing options. The possible keys are described under C<find_charset_in>. If C<sniff_encoding> is unable to determine the encoding, it defaults to C<$IO::HTML::default_encoding>, which is set to C<cp1252> (a.k.a. Windows-1252) by default. According to the standard, the default should be locale dependent, but that is not currently implemented. It dies if the file cannot be opened, or if C<sniff_encoding> cannot determine the encoding and C<$IO::HTML::default_encoding> has been set to C<undef>. =head2 html_file_and_encoding ($filehandle, $encoding, $bom) = html_file_and_encoding($filename, \%options); This function (exported only by request) is just like C<html_file>, but returns more information. In addition to the filehandle, it returns the name of the encoding used, and a flag indicating whether a byte order mark was found (if C<$bom> is true, the file began with a BOM). This may be useful if you want to write the file out again (especially in conjunction with the C<html_outfile> function). The optional second argument is a hashref containing options. The possible keys are described under C<find_charset_in>. It dies if the file cannot be opened, or if C<sniff_encoding> cannot determine the encoding and C<$IO::HTML::default_encoding> has been set to C<undef>. The result of calling C<html_file_and_encoding> in scalar context is undefined (in the C sense of there is no guarantee what you'll get). =head2 html_outfile $filehandle = html_outfile($filename, $encoding, $bom); This function (exported only by request) opens C<$filename> for output using C<$encoding>, and writes a BOM to it if C<$bom> is true. If C<$encoding> is C<undef>, it defaults to C<$IO::HTML::default_encoding>. C<$encoding> may be either an encoding name or an Encode::Encoding object. It dies if the file cannot be opened, or if both C<$encoding> and C<$IO::HTML::default_encoding> are C<undef>. =head2 sniff_encoding ($encoding, $bom) = sniff_encoding($filehandle, $filename, \%options); This function (exported only by request) runs the HTML5 encoding sniffing algorithm on C<$filehandle> (which must be seekable, and should have been opened in C<:raw> mode). C<$filename> is used only for error messages (if there's a problem using the filehandle), and defaults to "file" if omitted. The optional third argument is a hashref containing options. The possible keys are described under C<find_charset_in>. It returns Perl's canonical name for the encoding, which is not necessarily the same as the MIME or IANA charset name. It returns C<undef> if the encoding cannot be determined. C<$bom> is true if the file began with a byte order mark. In scalar context, it returns only C<$encoding>. The filehandle's position is restored to its original position (normally the beginning of the file) unless C<$bom> is true. In that case, the position is immediately after the BOM. Tip: If you want to run C<sniff_encoding> on a file you've already loaded into a string, open an in-memory file on the string, and pass that handle: ($encoding, $bom) = do { open(my $fh, '<', \$string); sniff_encoding($fh) }; (This only makes sense if C<$string> contains bytes, not characters.) =head2 find_charset_in $encoding = find_charset_in($string_containing_HTML, \%options); This function (exported only by request) looks for charset information in a C<< <meta> >> tag in a possibly-incomplete HTML document using the "two step" algorithm specified by HTML5. It does not look for a BOM. The C<< <meta> >> tag must begin within the first C<$IO::HTML::bytes_to_check> bytes of the string. It returns Perl's canonical name for the encoding, which is not necessarily the same as the MIME or IANA charset name. It returns C<undef> if no charset is specified or if the specified charset is not recognized by the Encode module. The optional second argument is a hashref containing options. The following keys are recognized: =over =item C<encoding> If true, return the L<Encode::Encoding> object instead of its name. Defaults to false. =item C<need_pragma> If true (the default), follow the HTML5 spec and examine the C<content> attribute only of C<< <meta http-equiv="Content-Type" >>. If set to 0, relax the HTML5 spec, and look for "charset=" in the C<content> attribute of I<every> meta tag. =back =head1 EXPORTS By default, only C<html_file> is exported. Other functions may be exported on request. For people who prefer not to export functions, all functions beginning with C<html_> have an alias without that prefix (e.g. you can call C<IO::HTML::file(...)> instead of C<IO::HTML::html_file(...)>. These aliases are not exportable. =for Pod::Coverage file file_and_encoding outfile The following export tags are available: =over =item C<:all> All exportable functions. =item C<:rw> C<html_file>, C<html_file_and_encoding>, C<html_outfile>. =back =head1 SEE ALSO The HTML5 specification, section 8.2.2.2 Determining the character encoding: L<http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding> =head1 DIAGNOSTICS =over =item C<< Could not read %s: %s >> The specified file could not be read from for the reason specified by C<$!>. =item C<< Could not seek %s: %s >> The specified file could not be rewound for the reason specified by C<$!>. =item C<< Failed to open %s: %s >> The specified file could not be opened for reading for the reason specified by C<$!>. =item C<< No default encoding specified >> The C<sniff_encoding> algorithm didn't find an encoding to use, and you set C<$IO::HTML::default_encoding> to C<undef>. =back =head1 CONFIGURATION AND ENVIRONMENT There are two global variables that affect IO::HTML. If you need to change them, you should do so using C<local> if possible: my $file = do { # This file may define the charset later in the header local $IO::HTML::bytes_to_check = 4096; html_file(...); }; =over =item C<$bytes_to_check> This is the number of bytes that C<sniff_encoding> will read from the stream. It is also the number of bytes that C<find_charset_in> will search for a C<< <meta> >> tag containing charset information. It must be a positive integer. The HTML 5 specification recommends using the default value of 1024, but some pages do not follow the specification. =item C<$default_encoding> This is the encoding that C<html_file> and C<html_file_and_encoding> will use if no encoding can be detected by C<sniff_encoding>. The default value is C<cp1252> (a.k.a. Windows-1252). Setting it to C<undef> will cause the file subroutines to croak if C<sniff_encoding> fails to determine the encoding. (C<sniff_encoding> itself does not use C<$default_encoding>). =back =head1 DEPENDENCIES IO::HTML has no non-core dependencies for Perl 5.8.7+. With earlier versions of Perl 5.8, you need to upgrade L<Encode> to at least version 2.10, and you may need to upgrade L<Exporter> to at least version 5.57. =head1 INCOMPATIBILITIES None reported. =head1 BUGS AND LIMITATIONS No bugs have been reported. =head1 AUTHOR Christopher J. Madsen S<C<< <perl AT cjmweb.net> >>> Please report any bugs or feature requests to S<C<< <bug-IO-HTML AT rt.cpan.org> >>> or through the web interface at L<< http://rt.cpan.org/Public/Bug/Report.html?Queue=IO-HTML >>. You can follow or contribute to IO-HTML's development at L<< https://github.com/madsen/io-html >>. =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2020 by Christopher J. Madsen. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =head1 DISCLAIMER OF WARRANTY BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. =cut Scalar.pm 0000444 00000035264 15160667122 0006325 0 ustar 00 package IO::Scalar; use strict; use Carp; use IO::Handle; ### Stringification, courtesy of B. K. Oxley (binkley): :-) use overload '""' => sub { ${*{$_[0]}->{SR}} }; use overload 'bool' => sub { 1 }; ### have to do this, so object is true! ### The package version, both in 1.23 style *and* usable by MakeMaker: our $VERSION = '2.113'; ### Inheritance: our @ISA = qw(IO::Handle); ### This stuff should be got rid of ASAP. require IO::WrapTie and push @ISA, 'IO::WrapTie::Slave' if ($] >= 5.004); #============================== =head1 NAME IO::Scalar - IO:: interface for reading/writing a scalar =head1 SYNOPSIS Perform I/O on strings, using the basic OO interface... use 5.005; use IO::Scalar; $data = "My message:\n"; ### Open a handle on a string, and append to it: $SH = new IO::Scalar \$data; $SH->print("Hello"); $SH->print(", world!\nBye now!\n"); print "The string is now: ", $data, "\n"; ### Open a handle on a string, read it line-by-line, then close it: $SH = new IO::Scalar \$data; while (defined($_ = $SH->getline)) { print "Got line: $_"; } $SH->close; ### Open a handle on a string, and slurp in all the lines: $SH = new IO::Scalar \$data; print "All lines:\n", $SH->getlines; ### Get the current position (either of two ways): $pos = $SH->getpos; $offset = $SH->tell; ### Set the current position (either of two ways): $SH->setpos($pos); $SH->seek($offset, 0); ### Open an anonymous temporary scalar: $SH = new IO::Scalar; $SH->print("Hi there!"); print "I printed: ", ${$SH->sref}, "\n"; ### get at value Don't like OO for your I/O? No problem. Thanks to the magic of an invisible tie(), the following now works out of the box, just as it does with IO::Handle: use 5.005; use IO::Scalar; $data = "My message:\n"; ### Open a handle on a string, and append to it: $SH = new IO::Scalar \$data; print $SH "Hello"; print $SH ", world!\nBye now!\n"; print "The string is now: ", $data, "\n"; ### Open a handle on a string, read it line-by-line, then close it: $SH = new IO::Scalar \$data; while (<$SH>) { print "Got line: $_"; } close $SH; ### Open a handle on a string, and slurp in all the lines: $SH = new IO::Scalar \$data; print "All lines:\n", <$SH>; ### Get the current position (WARNING: requires 5.6): $offset = tell $SH; ### Set the current position (WARNING: requires 5.6): seek $SH, $offset, 0; ### Open an anonymous temporary scalar: $SH = new IO::Scalar; print $SH "Hi there!"; print "I printed: ", ${$SH->sref}, "\n"; ### get at value And for you folks with 1.x code out there: the old tie() style still works, though this is I<unnecessary and deprecated>: use IO::Scalar; ### Writing to a scalar... my $s; tie *OUT, 'IO::Scalar', \$s; print OUT "line 1\nline 2\n", "line 3\n"; print "String is now: $s\n" ### Reading and writing an anonymous scalar... tie *OUT, 'IO::Scalar'; print OUT "line 1\nline 2\n", "line 3\n"; tied(OUT)->seek(0,0); while (<OUT>) { print "Got line: ", $_; } Stringification works, too! my $SH = new IO::Scalar \$data; print $SH "Hello, "; print $SH "world!"; print "I printed: $SH\n"; =head1 DESCRIPTION This class is part of the IO::Stringy distribution; see L<IO::Stringy> for change log and general information. The IO::Scalar class implements objects which behave just like IO::Handle (or FileHandle) objects, except that you may use them to write to (or read from) scalars. These handles are automatically C<tiehandle>d (though please see L<"WARNINGS"> for information relevant to your Perl version). Basically, this: my $s; $SH = new IO::Scalar \$s; $SH->print("Hel", "lo, "); ### OO style $SH->print("world!\n"); ### ditto Or this: my $s; $SH = tie *OUT, 'IO::Scalar', \$s; print OUT "Hel", "lo, "; ### non-OO style print OUT "world!\n"; ### ditto Causes $s to be set to: "Hello, world!\n" =head1 PUBLIC INTERFACE =head2 Construction =over 4 =cut #------------------------------ =item new [ARGS...] I<Class method.> Return a new, unattached scalar handle. If any arguments are given, they're sent to open(). =cut sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = bless \do { local *FH }, $class; tie *$self, $class, $self; $self->open(@_); ### open on anonymous by default $self; } sub DESTROY { shift->close; } #------------------------------ =item open [SCALARREF] I<Instance method.> Open the scalar handle on a new scalar, pointed to by SCALARREF. If no SCALARREF is given, a "private" scalar is created to hold the file data. Returns the self object on success, undefined on error. =cut sub open { my ($self, $sref) = @_; ### Sanity: defined($sref) or do {my $s = ''; $sref = \$s}; (ref($sref) eq "SCALAR") or croak "open() needs a ref to a scalar"; ### Setup: *$self->{Pos} = 0; ### seek position *$self->{SR} = $sref; ### scalar reference $self; } #------------------------------ =item opened I<Instance method.> Is the scalar handle opened on something? =cut sub opened { *{shift()}->{SR}; } #------------------------------ =item close I<Instance method.> Disassociate the scalar handle from its underlying scalar. Done automatically on destroy. =cut sub close { my $self = shift; %{*$self} = (); 1; } =back =cut #============================== =head2 Input and output =over 4 =cut #------------------------------ =item flush I<Instance method.> No-op, provided for OO compatibility. =cut sub flush { "0 but true" } #------------------------------ =item fileno I<Instance method.> No-op, returns undef =cut sub fileno { } #------------------------------ =item getc I<Instance method.> Return the next character, or undef if none remain. =cut sub getc { my $self = shift; ### Return undef right away if at EOF; else, move pos forward: return undef if $self->eof; substr(${*$self->{SR}}, *$self->{Pos}++, 1); } #------------------------------ =item getline I<Instance method.> Return the next line, or undef on end of string. Can safely be called in an array context. Currently, lines are delimited by "\n". =cut sub getline { my $self = shift; ### Return undef right away if at EOF: return undef if $self->eof; ### Get next line: my $sr = *$self->{SR}; my $i = *$self->{Pos}; ### Start matching at this point. ### Minimal impact implementation! ### We do the fast thing (no regexps) if using the ### classic input record separator. ### Case 1: $/ is undef: slurp all... if (!defined($/)) { *$self->{Pos} = length $$sr; return substr($$sr, $i); } ### Case 2: $/ is "\n": zoom zoom zoom... elsif ($/ eq "\012") { ### Seek ahead for "\n"... yes, this really is faster than regexps. my $len = length($$sr); for (; $i < $len; ++$i) { last if ord (substr ($$sr, $i, 1)) == 10; } ### Extract the line: my $line; if ($i < $len) { ### We found a "\n": $line = substr ($$sr, *$self->{Pos}, $i - *$self->{Pos} + 1); *$self->{Pos} = $i+1; ### Remember where we finished up. } else { ### No "\n"; slurp the remainder: $line = substr ($$sr, *$self->{Pos}, $i - *$self->{Pos}); *$self->{Pos} = $len; } return $line; } ### Case 3: $/ is ref to int. Do fixed-size records. ### (Thanks to Dominique Quatravaux.) elsif (ref($/)) { my $len = length($$sr); my $i = ${$/} + 0; my $line = substr ($$sr, *$self->{Pos}, $i); *$self->{Pos} += $i; *$self->{Pos} = $len if (*$self->{Pos} > $len); return $line; } ### Case 4: $/ is either "" (paragraphs) or something weird... ### This is Graham's general-purpose stuff, which might be ### a tad slower than Case 2 for typical data, because ### of the regexps. else { pos($$sr) = $i; ### If in paragraph mode, skip leading lines (and update i!): length($/) or (($$sr =~ m/\G\n*/g) and ($i = pos($$sr))); ### If we see the separator in the buffer ahead... if (length($/) ? $$sr =~ m,\Q$/\E,g ### (ordinary sep) TBD: precomp! : $$sr =~ m,\n\n,g ### (a paragraph) ) { *$self->{Pos} = pos $$sr; return substr($$sr, $i, *$self->{Pos}-$i); } ### Else if no separator remains, just slurp the rest: else { *$self->{Pos} = length $$sr; return substr($$sr, $i); } } } #------------------------------ =item getlines I<Instance method.> Get all remaining lines. It will croak() if accidentally called in a scalar context. =cut sub getlines { my $self = shift; wantarray or croak("can't call getlines in scalar context!"); my ($line, @lines); push @lines, $line while (defined($line = $self->getline)); @lines; } #------------------------------ =item print ARGS... I<Instance method.> Print ARGS to the underlying scalar. B<Warning:> this continues to always cause a seek to the end of the string, but if you perform seek()s and tell()s, it is still safer to explicitly seek-to-end before subsequent print()s. =cut sub print { my $self = shift; *$self->{Pos} = length(${*$self->{SR}} .= join('', @_) . (defined($\) ? $\ : "")); 1; } sub _unsafe_print { my $self = shift; my $append = join('', @_) . $\; ${*$self->{SR}} .= $append; *$self->{Pos} += length($append); 1; } sub _old_print { my $self = shift; ${*$self->{SR}} .= join('', @_) . $\; *$self->{Pos} = length(${*$self->{SR}}); 1; } #------------------------------ =item read BUF, NBYTES, [OFFSET] I<Instance method.> Read some bytes from the scalar. Returns the number of bytes actually read, 0 on end-of-file, undef on error. =cut sub read { my $self = $_[0]; my $n = $_[2]; my $off = $_[3] || 0; my $read = substr(${*$self->{SR}}, *$self->{Pos}, $n); $n = length($read); *$self->{Pos} += $n; ($off ? substr($_[1], $off) : $_[1]) = $read; return $n; } #------------------------------ =item write BUF, NBYTES, [OFFSET] I<Instance method.> Write some bytes to the scalar. =cut sub write { my $self = $_[0]; my $n = $_[2]; my $off = $_[3] || 0; my $data = substr($_[1], $off, $n); $n = length($data); $self->print($data); return $n; } #------------------------------ =item sysread BUF, LEN, [OFFSET] I<Instance method.> Read some bytes from the scalar. Returns the number of bytes actually read, 0 on end-of-file, undef on error. =cut sub sysread { my $self = shift; $self->read(@_); } #------------------------------ =item syswrite BUF, NBYTES, [OFFSET] I<Instance method.> Write some bytes to the scalar. =cut sub syswrite { my $self = shift; $self->write(@_); } =back =cut #============================== =head2 Seeking/telling and other attributes =over 4 =cut #------------------------------ =item autoflush I<Instance method.> No-op, provided for OO compatibility. =cut sub autoflush {} #------------------------------ =item binmode I<Instance method.> No-op, provided for OO compatibility. =cut sub binmode {} #------------------------------ =item clearerr I<Instance method.> Clear the error and EOF flags. A no-op. =cut sub clearerr { 1 } #------------------------------ =item eof I<Instance method.> Are we at end of file? =cut sub eof { my $self = shift; (*$self->{Pos} >= length(${*$self->{SR}})); } #------------------------------ =item seek OFFSET, WHENCE I<Instance method.> Seek to a given position in the stream. =cut sub seek { my ($self, $pos, $whence) = @_; my $eofpos = length(${*$self->{SR}}); ### Seek: if ($whence == 0) { *$self->{Pos} = $pos } ### SEEK_SET elsif ($whence == 1) { *$self->{Pos} += $pos } ### SEEK_CUR elsif ($whence == 2) { *$self->{Pos} = $eofpos + $pos} ### SEEK_END else { croak "bad seek whence ($whence)" } ### Fixup: if (*$self->{Pos} < 0) { *$self->{Pos} = 0 } if (*$self->{Pos} > $eofpos) { *$self->{Pos} = $eofpos } return 1; } #------------------------------ =item sysseek OFFSET, WHENCE I<Instance method.> Identical to C<seek OFFSET, WHENCE>, I<q.v.> =cut sub sysseek { my $self = shift; $self->seek (@_); } #------------------------------ =item tell I<Instance method.> Return the current position in the stream, as a numeric offset. =cut sub tell { *{shift()}->{Pos} } #------------------------------ # # use_RS [YESNO] # # I<Instance method.> # Obey the current setting of $/, like IO::Handle does? # Default is false in 1.x, but cold-welded true in 2.x and later. # sub use_RS { my ($self, $yesno) = @_; carp "use_RS is deprecated and ignored; \$/ is always consulted\n"; } #------------------------------ =item setpos POS I<Instance method.> Set the current position, using the opaque value returned by C<getpos()>. =cut sub setpos { shift->seek($_[0],0) } #------------------------------ =item getpos I<Instance method.> Return the current position in the string, as an opaque object. =cut *getpos = \&tell; #------------------------------ =item sref I<Instance method.> Return a reference to the underlying scalar. =cut sub sref { *{shift()}->{SR} } #------------------------------ # Tied handle methods... #------------------------------ # Conventional tiehandle interface: sub TIEHANDLE { ((defined($_[1]) && UNIVERSAL::isa($_[1], "IO::Scalar")) ? $_[1] : shift->new(@_)); } sub GETC { shift->getc(@_) } sub PRINT { shift->print(@_) } sub PRINTF { shift->print(sprintf(shift, @_)) } sub READ { shift->read(@_) } sub READLINE { wantarray ? shift->getlines(@_) : shift->getline(@_) } sub WRITE { shift->write(@_); } sub CLOSE { shift->close(@_); } sub SEEK { shift->seek(@_); } sub TELL { shift->tell(@_); } sub EOF { shift->eof(@_); } sub BINMODE { 1; } #------------------------------------------------------------ 1; __END__ =back =cut =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut AtomicFile.pm 0000444 00000012372 15160667123 0007130 0 ustar 00 package IO::AtomicFile; use strict; use warnings; use parent 'IO::File'; our $VERSION = '2.113'; #------------------------------ # new ARGS... #------------------------------ # Class method, constructor. # Any arguments are sent to open(). # sub new { my $class = shift; my $self = $class->SUPER::new(); ${*$self}{'io_atomicfile_suffix'} = ''; $self->open(@_) if @_; $self; } #------------------------------ # DESTROY #------------------------------ # Destructor. # sub DESTROY { shift->close(1); ### like close, but raises fatal exception on failure } #------------------------------ # open PATH, MODE #------------------------------ # Class/instance method. # sub open { my ($self, $path, $mode) = @_; ref($self) or $self = $self->new; ### now we have an instance! ### Create tmp path, and remember this info: my $temp = "${path}..TMP" . ${*$self}{'io_atomicfile_suffix'}; ${*$self}{'io_atomicfile_temp'} = $temp; ${*$self}{'io_atomicfile_path'} = $path; ### Open the file! Returns filehandle on success, for use as a constructor: $self->SUPER::open($temp, $mode) ? $self : undef; } #------------------------------ # _closed [YESNO] #------------------------------ # Instance method, private. # Are we already closed? Argument sets new value, returns previous one. # sub _closed { my $self = shift; my $oldval = ${*$self}{'io_atomicfile_closed'}; ${*$self}{'io_atomicfile_closed'} = shift if @_; $oldval; } #------------------------------ # close #------------------------------ # Instance method. # Close the handle, and rename the temp file to its final name. # sub close { my ($self, $die) = @_; unless ($self->_closed(1)) { ### sentinel... if ($self->SUPER::close()) { rename(${*$self}{'io_atomicfile_temp'}, ${*$self}{'io_atomicfile_path'}) or ($die ? die "close (rename) atomic file: $!\n" : return undef); } else { ($die ? die "close atomic file: $!\n" : return undef); } } 1; } #------------------------------ # delete #------------------------------ # Instance method. # Close the handle, and delete the temp file. # sub delete { my $self = shift; unless ($self->_closed(1)) { ### sentinel... $self->SUPER::close(); return unlink(${*$self}{'io_atomicfile_temp'}); } 1; } #------------------------------ # detach #------------------------------ # Instance method. # Close the handle, but DO NOT delete the temp file. # sub detach { my $self = shift; $self->SUPER::close() unless ($self->_closed(1)); 1; } #------------------------------ 1; __END__ =head1 NAME IO::AtomicFile - write a file which is updated atomically =head1 SYNOPSIS use strict; use warnings; use feature 'say'; use IO::AtomicFile; # Write a temp file, and have it install itself when closed: my $fh = IO::AtomicFile->open("bar.dat", "w"); $fh->say("Hello!"); $fh->close || die "couldn't install atomic file: $!"; # Write a temp file, but delete it before it gets installed: my $fh = IO::AtomicFile->open("bar.dat", "w"); $fh->say("Hello!"); $fh->delete; # Write a temp file, but neither install it nor delete it: my $fh = IO::AtomicFile->open("bar.dat", "w"); $fh->say("Hello!"); $fh->detach; =head1 DESCRIPTION This module is intended for people who need to update files reliably in the face of unexpected program termination. For example, you generally don't want to be halfway in the middle of writing I</etc/passwd> and have your program terminate! Even the act of writing a single scalar to a filehandle is I<not> atomic. But this module gives you true atomic updates, via C<rename>. When you open a file I</foo/bar.dat> via this module, you are I<actually> opening a temporary file I</foo/bar.dat..TMP>, and writing your output there. The act of closing this file (either explicitly via C<close>, or implicitly via the destruction of the object) will cause C<rename> to be called... therefore, from the point of view of the outside world, the file's contents are updated in a single time quantum. To ensure that problems do not go undetected, the C<close> method done by the destructor will raise a fatal exception if the C<rename> fails. The explicit C<close> just returns C<undef>. You can also decide at any point to trash the file you've been building. =head1 METHODS L<IO::AtomicFile> inherits all methods from L<IO::File> and implements the following new ones. =head2 close $fh->close(); This method calls its parent L<IO::File/"close"> and then renames its temporary file as the original file name. =head2 delete $fh->delete(); This method calls its parent L<IO::File/"close"> and then deletes the temporary file. =head2 detach $fh->detach(); This method calls its parent L<IO::File/"close">. Unlike L<IO::AtomicFile/"delete"> it does not then delete the temporary file. =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Stringy.pm 0000444 00000003435 15160667123 0006553 0 ustar 00 package IO::Stringy; use strict; use Exporter; our $VERSION = '2.113'; 1; __END__ =head1 NAME IO-stringy - I/O on in-core objects like strings and arrays =head1 SYNOPSIS use strict; use warnings; use IO::AtomicFile; # Write a file which is updated atomically use IO::InnerFile; # define a file inside another file use IO::Lines; # I/O handle to read/write to array of lines use IO::Scalar; # I/O handle to read/write to a string use IO::ScalarArray; # I/O handle to read/write to array of scalars use IO::Wrap; # Wrap old-style FHs in standard OO interface use IO::WrapTie; # Tie your handles & retain full OO interface # ... =head1 DESCRIPTION This toolkit primarily provides modules for performing both traditional and object-oriented i/o) on things I<other> than normal filehandles; in particular, L<IO::Scalar|IO::Scalar>, L<IO::ScalarArray|IO::ScalarArray>, and L<IO::Lines|IO::Lines>. In the more-traditional IO::Handle front, we have L<IO::AtomicFile|IO::AtomicFile> which may be used to painlessly create files which are updated atomically. And in the "this-may-prove-useful" corner, we have L<IO::Wrap|IO::Wrap>, whose exported wraphandle() function will clothe anything that's not a blessed object in an IO::Handle-like wrapper... so you can just use OO syntax and stop worrying about whether your function's caller handed you a string, a globref, or a FileHandle. =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut InnerFile.pm 0000444 00000015601 15160667123 0006765 0 ustar 00 package IO::InnerFile; use strict; use warnings; use Symbol; our $VERSION = '2.113'; sub new { my ($class, $fh, $start, $lg) = @_; $start = 0 if (!$start or ($start < 0)); $lg = 0 if (!$lg or ($lg < 0)); ### Create the underlying "object": my $a = { FH => $fh, CRPOS => 0, START => $start, LG => $lg, }; ### Create a new filehandle tied to this object: $fh = gensym; tie(*$fh, $class, $a); return bless($fh, $class); } sub TIEHANDLE { my ($class, $data) = @_; return bless($data, $class); } sub DESTROY { my ($self) = @_; $self->close() if (ref($self) eq 'SCALAR'); } sub set_length { tied(${$_[0]})->{LG} = $_[1]; } sub get_length { tied(${$_[0]})->{LG}; } sub add_length { tied(${$_[0]})->{LG} += $_[1]; } sub set_start { tied(${$_[0]})->{START} = $_[1]; } sub get_start { tied(${$_[0]})->{START}; } sub set_end { tied(${$_[0]})->{LG} = $_[1] - tied(${$_[0]})->{START}; } sub get_end { tied(${$_[0]})->{LG} + tied(${$_[0]})->{START}; } sub write { shift->WRITE(@_) } sub print { shift->PRINT(@_) } sub printf { shift->PRINTF(@_) } sub flush { "0 but true"; } sub fileno { } sub binmode { 1; } sub getc { return GETC(tied(${$_[0]}) ); } sub read { return READ( tied(${$_[0]}), @_[1,2,3] ); } sub readline { return READLINE( tied(${$_[0]}) ); } sub getline { return READLINE( tied(${$_[0]}) ); } sub close { return CLOSE(tied(${$_[0]}) ); } sub seek { my ($self, $ofs, $whence) = @_; $self = tied( $$self ); $self->{CRPOS} = $ofs if ($whence == 0); $self->{CRPOS}+= $ofs if ($whence == 1); $self->{CRPOS} = $self->{LG} + $ofs if ($whence == 2); $self->{CRPOS} = 0 if ($self->{CRPOS} < 0); $self->{CRPOS} = $self->{LG} if ($self->{CRPOS} > $self->{LG}); return 1; } sub tell { return tied(${$_[0]})->{CRPOS}; } sub WRITE { die "inner files can only open for reading\n"; } sub PRINT { die "inner files can only open for reading\n"; } sub PRINTF { die "inner files can only open for reading\n"; } sub GETC { my ($self) = @_; return 0 if ($self->{CRPOS} >= $self->{LG}); my $data; ### Save and seek... my $old_pos = $self->{FH}->tell; $self->{FH}->seek($self->{CRPOS}+$self->{START}, 0); ### ...read... my $lg = $self->{FH}->read($data, 1); $self->{CRPOS} += $lg; ### ...and restore: $self->{FH}->seek($old_pos, 0); $self->{LG} = $self->{CRPOS} unless ($lg); return ($lg ? $data : undef); } sub READ { my ($self, $undefined, $lg, $ofs) = @_; $undefined = undef; return 0 if ($self->{CRPOS} >= $self->{LG}); $lg = $self->{LG} - $self->{CRPOS} if ($self->{CRPOS} + $lg > $self->{LG}); return 0 unless ($lg); ### Save and seek... my $old_pos = $self->{FH}->tell; $self->{FH}->seek($self->{CRPOS}+$self->{START}, 0); ### ...read... $lg = $self->{FH}->read($_[1], $lg, $_[3] ); $self->{CRPOS} += $lg; ### ...and restore: $self->{FH}->seek($old_pos, 0); $self->{LG} = $self->{CRPOS} unless ($lg); return $lg; } sub READLINE { my ($self) = @_; return $self->_readline_helper() unless wantarray; my @arr; while(defined(my $line = $self->_readline_helper())) { push(@arr, $line); } return @arr; } sub _readline_helper { my ($self) = @_; return undef if ($self->{CRPOS} >= $self->{LG}); # Handle slurp mode (CPAN ticket #72710) if (! defined($/)) { my $text; $self->READ($text, $self->{LG} - $self->{CRPOS}); return $text; } ### Save and seek... my $old_pos = $self->{FH}->tell; $self->{FH}->seek($self->{CRPOS}+$self->{START}, 0); ### ...read... my $text = $self->{FH}->getline; ### ...and restore: $self->{FH}->seek($old_pos, 0); #### If we detected a new EOF ... unless (defined $text) { $self->{LG} = $self->{CRPOS}; return undef; } my $lg=length($text); $lg = $self->{LG} - $self->{CRPOS} if ($self->{CRPOS} + $lg > $self->{LG}); $self->{CRPOS} += $lg; return substr($text, 0,$lg); } sub CLOSE { %{$_[0]}=(); } 1; __END__ __END__ =head1 NAME IO::InnerFile - define a file inside another file =head1 SYNOPSIS use strict; use warnings; use IO::InnerFile; # Read a subset of a file: my $fh = _some_file_handle; my $start = 10; my $length = 50; my $inner = IO::InnerFile->new($fh, $start, $length); while (my $line = <$inner>) { # ... } =head1 DESCRIPTION If you have a file handle that can C<seek> and C<tell>, then you can open an L<IO::InnerFile> on a range of the underlying file. =head1 CONSTRUCTORS L<IO::InnerFile> implements the following constructors. =head2 new my $inner = IO::InnerFile->new($fh); $inner = IO::InnerFile->new($fh, 10); $inner = IO::InnerFile->new($fh, 10, 50); Create a new L<IO::InnerFile> opened on the given file handle. The file handle supplied B<MUST> be able to both C<seek> and C<tell>. The second and third parameters are start and length. Both are defaulted to zero (C<0>). Negative values are silently coerced to zero. =head1 METHODS L<IO::InnerFile> implements the following methods. =head2 add_length $inner->add_length(30); Add to the virtual length of the inner file by the number given in bytes. =head2 add_start $inner->add_start(30); Add to the virtual position of the inner file by the number given in bytes. =head2 binmode $inner->binmode(); This is a NOOP method just to satisfy the normal L<IO::File> interface. =head2 close =head2 fileno $inner->fileno(); This is a NOOP method just to satisfy the normal L<IO::File> interface. =head2 flush $inner->flush(); This is a NOOP method just to satisfy the normal L<IO::File> interface. =head2 get_end my $num_bytes = $inner->get_end(); Get the virtual end position of the inner file in bytes. =head2 get_length my $num_bytes = $inner->get_length(); Get the virtual length of the inner file in bytes. =head2 get_start my $num_bytes = $inner->get_start(); Get the virtual position of the inner file in bytes. =head2 getc =head2 getline =head2 print LIST =head2 printf =head2 read =head2 readline =head2 seek =head2 set_end $inner->set_end(30); Set the virtual end of the inner file in bytes (this basically just alters the length). =head2 set_length $inner->set_length(30); Set the virtual length of the inner file in bytes. =head2 set_start $inner->set_start(30); Set the virtual start position of the inner file in bytes. =head2 tell =head2 write =head1 AUTHOR Eryq (F<eryq@zeegee.com>). President, ZeeGee Software Inc (F<http://www.zeegee.com>). =head1 CONTRIBUTORS Dianne Skoll (F<dfs@roaringpenguin.com>). =head1 COPYRIGHT & LICENSE Copyright (c) 1997 Erik (Eryq) Dorfman, ZeeGee Software, Inc. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut
| ver. 1.4 |
Github
|
.
| PHP 8.1.34 | ���֧ߧ֧�ѧ�ڧ� ����ѧߧڧ��: 0 |
proxy
|
phpinfo
|
���ѧ����ۧܧ�