package SGX::CSV; use strict; use warnings; require File::Temp; require Text::CSV; use Benchmark qw/timediff timestr/; use SGX::Util qw/build_matchp car/; use SGX::Abstract::Exception (); #=== FUNCTION ================================================================ # NAME: bind_handle # PURPOSE: # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: ???? # THROWS: no exceptions # COMMENTS: # # :TODO:10/21/2011 00:49:20:es: Use Text::CSV types: # http://search.cpan.org/~makamaka/Text-CSV-1.21/lib/Text/CSV.pm#types # # SEE ALSO: n/a #=============================================================================== sub bind_handle { my $handle = shift; my $csv = eval { Text::CSV->new( { eol => "\r\n", sep_char => "," } ); } or do { my $exception = Exception::Class->caught(); my $msg = "$exception" || 'Unknown error'; SGX::Exception::Internal->throw( error => "Can't load Text::CSV: $msg" ); }; return sub { return $csv->print( $handle, shift || [] ); }; } #=== CLASS METHOD ============================================================ # CLASS: CSV # METHOD: # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub delegate_fileUpload { my %args = @_; my $self = $args{delegate}; my $sql = $args{statements}; my $validators = $args{validators}; my $parameters = $args{parameters}; my $index_for_count = $args{index_for_count}; my $filename = $args{filename}; # will use last return code by default (index -1) my $return_code_to_report = exists( $args{return_code_to_report} ) ? $args{return_code_to_report} : -1; my $success_message = exists( $args{success_message} ) ? $args{success_message} : 1; my $dbh = $self->{_dbh}; my @statements = map { $dbh->prepare($_) } @$sql; my $old_AutoCommit = $dbh->{AutoCommit}; $dbh->{AutoCommit} = 0; my $t0 = Benchmark->new(); my @return_codes; eval { foreach my $sth (@statements) { my $param = shift @$parameters; my @proc_param = map { ( ref($_) eq 'CODE' ) ? $_->() : $_ } @$param; push @return_codes, 0 + $sth->execute(@proc_param); $sth->finish(); if ( defined $validators ) { my $validator = shift @$validators; $validator->( \@return_codes ) if defined $validator; } } 1; } or do { my $exception; if ( $exception = Exception::Class->caught('Exception::Class::DBI::STH') ) { # catch dbi::sth exceptions. note: this block catches duplicate # key record exceptions. $self->add_message( {}, 'No changes to the database were stored' ); $self->add_message( { -class => 'error' }, sprintf( 'DBI Error %d: %s', $exception->err, $exception->errstr ) ); } elsif ( $exception = Exception::Class->caught() ) { my $msg = eval { $exception->error } || "$exception" || 'Unknown error'; $self->add_message( {}, 'No changes to the database were stored' ); $self->add_message( { -class => 'error' }, $msg ); } else { $self->add_message( {}, 'Unknown error occured when loading data into the database. No changes to the database were stored.' ); } $dbh->rollback; unlink $filename if defined $filename; $_->finish() for @statements; $dbh->{AutoCommit} = $old_AutoCommit; return; }; $dbh->commit; $_->finish() for @statements; $dbh->{AutoCommit} = $old_AutoCommit; my $t1 = Benchmark->new(); unlink $filename if defined $filename; my $records_added = $return_codes[$return_code_to_report]; my $time_diff = timestr( timediff( $t1, $t0 ) ); if ($success_message) { my $msg = "Success! Added $records_added entries to the database. The operation took $time_diff."; $self->add_message( { -class => 'success' }, $msg ); } return 1; } #=== CLASS METHOD ============================================================ # CLASS: CSV # METHOD: sanitizeUploadWithMessages # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub sanitizeUploadWithMessages { my ( $delegate, $inputField, %args ) = @_; my $q = $delegate->{_cgi}; # field separator character $args{csv_in_opts} ||= {}; $args{csv_in_opts}->{sep_char} = ( car( $q->param('separator') ) || "\t" ) if not defined $args{csv_in_opts}->{sep_char}; # whether first line is a header or not $args{header} = ( defined( $q->param('header') ) ? 1 : 0 ) if not defined $args{header}; my $recordsValid = 0; my $outputFileNames = eval { sanitizeUploadFile( $q, $inputField, \$recordsValid, %args ); } || []; my $exception; if ( $exception = Exception::Class->caught('SGX::Exception::User') ) { # Notify user of User exception; rethrow Internal and other types of # exceptions. $delegate->add_message( { -class => 'error' }, 'There was a problem with your input: ' . $exception->error ); } elsif ( $exception = Exception::Class->caught() ) { $exception->rethrow(); } elsif ( $recordsValid == 0 ) { $delegate->add_message( { -class => 'error' }, 'No valid records were uploaded.' ); } return ( $outputFileNames, $recordsValid ); } #=== CLASS METHOD ============================================================ # CLASS: CSV # METHOD: sanitizeUploadFile # PARAMETERS: $outputFileName - Name of the temporary file to write to # RETURNS: Number of valid records found # DESCRIPTION: validate and rewrite the uploaded file # THROWS: SGX::Exception::Internal, SGX::Exception::User # COMMENTS: # :TODO:07/08/2011 12:55:45:es: Make headers optional # SEE ALSO: n/a #=============================================================================== sub sanitizeUploadFile { my ( $q, $inputField, $recordsValid, %args ) = @_; # This is where we put the temp file we will import. UNLINK option to # File::Temp constructor means that the File::Temp destructor will try to # unlink the temporary file on its own (we don't need to worry about # unlinking). Because we are initializing an instance of File::Temp in the # namespace of this function, the temporary file will be deleted when the # function exists (when the reference to File::Temp will go out of context). my $rewrite = ( exists $args{rewrite} ) ? $args{rewrite} : 1; # The is the file handle of the uploaded file. my $uploadedFile = $q->upload($inputField) or SGX::Exception::User->throw( error => "Failed to upload file.\n" ); #Open file we are writing to server. my @outputFileName; my @OUTPUTTOSERVER; for ( my $i = 0 ; $i < $rewrite ; $i++ ) { my $outputFileName = File::Temp->new( SUFFIX => '.txt', UNLINK => 1 )->filename(); open my $OUTPUTTOSERVER, '>', $outputFileName ## no critic or SGX::Exception::Internal->throw( error => "Could not open $outputFileName for writing: $!\n" ); push @outputFileName, $outputFileName; push @OUTPUTTOSERVER, $OUTPUTTOSERVER; } $$recordsValid = eval { csv_rewrite( $uploadedFile, \@OUTPUTTOSERVER, %args ); } || 0; if ( my $exception = Exception::Class->caught() ) { # In case of error, close files first and rethrow the exception close($_) for @OUTPUTTOSERVER; if ( eval { $exception->can('rethrow') } ) { $exception->rethrow(); } else { # Internal error details do not diplay to browser window SGX::Exception::Internal->throw( error => "$exception\n" ); } } elsif ( $$recordsValid < 1 ) { SGX::Exception::User->throw( error => "No records found in input file\n" ); } return \@outputFileName; } #=== FUNCTION ================================================================ # NAME: csv_rewrite # PURPOSE: Validate user-uploaded microarray data and rewrite the file # provided into a new file for safe slurp-mode loading into the # database. The original idea was to input a tab-separated file, # validate it, and rewrite it as a pipe-separated file. # PARAMETERS: Required: # $in - ARRAY reference to input file contents split by lines # $out - output file handle # $parse - array of functions to validate input fields # # Optional (named) with default values: # header => 0 - whether input contains a header # csv_in_opts => {} - Input Text::CSV options, e.g. # csv_in_opts => { sep_char => "\t" } # csv_out_opts => {} - Output Text::CSV options, e.g. # csv_out_opts => { # eol => "\n", # sep_char => ',' # } # RETURNS: True value on success; raises exception on failure # DESCRIPTION: Takes in a CSV-formatted plain-text file (either with header # present or not) such that the first column is a string "key" # and the first n columns after it are numeric. Rewrites this # file while dropping the header. The following options should be # used for MySQL LOAD DATA INFILE statement: # # FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' # LINES TERMINATED BY '\n' STARTING BY '' # # THROWS: SGX::Exception::User # COMMENTS: n/a # # SEE ALSO: perldoc Text::CSV # http://search.cpan.org/~makamaka/Text-CSV-1.21/lib/Text/CSV.pm #=============================================================================== sub csv_rewrite { my ( $uploadedFile, $out, %args ) = @_; # Read uploaded file in "slurp" mode (at once), and break it on the # following combinations of line separators in respective order: (1) CRLF # (Windows), (2) LF (Unix), and (3) CR (Mac). my @lines = split( /\r\n|\n|\r/, do { local $/ = <$uploadedFile> } ); # upload file should get deleted automatically on close close $uploadedFile; # whether input file contains a header my $header = ( exists $args{header} ) ? $args{header} : 0; # default: no header # Default value for sep_char is as indicated below unless overridden by # $args{csv_out_opts}. Also, other Text::CSV options can be set through # $args{csv_out_opts}. my %csv_in_opts = ( sep_char => "\t", allow_whitespace => 1, %{ $args{csv_in_opts} || {} } ); # Default values for sep_char and eol are as indicated below unless # overridden by $args{csv_out_opts}. Also, other Text::CSV options can be # set through $args{csv_out_opts}. my %csv_out_opts = ( sep_char => ',', eol => "\n", %{ $args{csv_out_opts} || {} } ); my $csv_in = Text::CSV->new( \%csv_in_opts ); my $csv_out = Text::CSV->new( \%csv_out_opts ); my $record_num = 0; # record is a non-empty line # Generate a custom function that will check whether array consists of # elements we consider to be empty. When "allow_whitespace" is set during # parsing, white space will be stripped off automatically from value bounds # by Text::CSV, in which case we define "empty" to mean empty string. When # "allow_whitespace" is not set, we define "empty" to mean any combination # of space characters *or* an empty string. In addition, we ignore undefined # values (they are equivalent to empty fields). my $is_empty = build_matchp( ( $csv_in_opts{allow_whitespace} ? qr/^$/ : qr/^\s*$/ ), ignore_undef => 1 ); #--------------------------------------------------------------------------- # default process routine #--------------------------------------------------------------------------- my $parser = $args{parser} || []; my $min_field_count = $args{required_fields}; $min_field_count = @$parser if not defined $min_field_count; my @sub_print; foreach my $fh (@$out) { push @sub_print, sub { $csv_out->print( $fh, \@_ ) }; } my $process = $args{process} || sub { my $printfun = shift; my $line_num = shift; my $fields = shift; # check total number of fields present if ( @$fields < $min_field_count ) { SGX::Exception::User->throw( error => sprintf( "Only %d field(s) found (%d required) on line %d\n", scalar(@$fields), $min_field_count, $line_num ) ); } # perform validation on each column eval { $printfun->[0]->( map { ## no critic my $val = shift @$fields; ( defined $_ ) ? $_->( $val, $line_num ) : () } @$parser ); 1; } or do { my $exception; if ( $exception = Exception::Class->caught('SGX::Exception::Skip') ) { # only skip 'Skip' exception (which means: skip line) } elsif ( $exception = Exception::Class->caught() ) { if ( eval { $exception->can('rethrow') } ) { $exception->rethrow(); } else { # Internal error details do not diplay to browser window SGX::Exception::Internal->throw( error => "$exception\n" ); } } }; return 1; }; #--------------------------------------------------------------------------- # main loop #--------------------------------------------------------------------------- for ( my $line_num = 1 ; $csv_in->parse( shift @lines ) ; $line_num++ ) { my @fields = $csv_in->fields(); # skip blank lines next if $is_empty->(@fields); # skip header if requested to $record_num++; next if $record_num == 1 and $header; $process->( \@sub_print, $line_num, \@fields ); } # check for errors my ( $err_code, $err_string, $err_pos ) = $csv_in->error_diag(); if ($err_code) { SGX::Exception::User->throw( error => "Text::CSV error code $err_code ($err_string), position $err_pos. Records written: $record_num" ); } # return number of records written return $record_num; } 1; __END__ #=============================================================================== # # FILE: CSV.pm # # DESCRIPTION: Routines for handling CSV files # # FILES: --- # BUGS: --- # NOTES: # # AUTHOR: Eugene Scherba (es), escherba@gmail.com # COMPANY: Boston University # VERSION: 1.0 # CREATED: 07/09/2011 16:22:21 # REVISION: --- #===============================================================================