package SGX::UploadGO; use strict; use warnings; use base qw/SGX::Strategy::Base/; use SGX::Abstract::Exception (); use SGX::Config qw/%SEGEX_CONFIG/; #--------------------------------------------------------------------------- # Parse term.txt. GO schema: # # CREATE TABLE `term` ( # `id` int(11) NOT NULL AUTO_INCREMENT, # `name` varchar(255) NOT NULL DEFAULT '', # `term_type` varchar(55) NOT NULL, # `acc` varchar(255) NOT NULL, # `is_obsolete` int(11) NOT NULL DEFAULT '0', # `is_root` int(11) NOT NULL DEFAULT '0', # `is_relation` int(11) NOT NULL DEFAULT '0', # PRIMARY KEY (`id`), # UNIQUE KEY `acc` (`acc`), # UNIQUE KEY `t0` (`id`), # KEY `t1` (`name`), # KEY `t2` (`term_type`), # KEY `t3` (`acc`), # KEY `t4` (`id`,`acc`), # KEY `t5` (`id`,`name`), # KEY `t6` (`id`,`term_type`), # KEY `t7` (`id`,`acc`,`name`,`term_type`) # ) TYPE=MyISAM AUTO_INCREMENT=35385; #--------------------------------------------------------------------------- my @term_parser = ( # term id sub { if ( shift =~ /(\d+)/ ) { return $1 + 0; } else { SGX::Exception::User->throw( error => 'Term id not an unsigned integer on line ' . shift ); } }, # term name sub { my ($x) = shift =~ /(.*)/; return $x }, # term type sub { my ($x) = shift =~ /(.*)/; return $x }, # GO accession number sub { if ( shift =~ /^GO:(\d{7})$/ ) { return $1 + 0; } else { SGX::Exception::Skip->throw( error => 'Cannot parse GO term on line ' . shift ); } } ); #--------------------------------------------------------------------------- # Parse term_definition.txt. GO schema: # # CREATE TABLE `term_definition` ( # `term_id` int(11) NOT NULL, # `term_definition` text NOT NULL, # `dbxref_id` int(11) DEFAULT NULL, # `term_comment` mediumtext, # `reference` varchar(255) DEFAULT NULL, # UNIQUE KEY `term_id` (`term_id`), # KEY `dbxref_id` (`dbxref_id`), # KEY `td1` (`term_id`) # ) TYPE=MyISAM; #--------------------------------------------------------------------------- my @term_definition_parser = ( # term id sub { if ( shift =~ /(\d+)/ ) { return $1 + 0; } else { SGX::Exception::User->throw( error => 'Term id not an unsigned integer on line ' . shift ); } }, # term definition sub { my ($x) = shift =~ /(.*)/; return $x } ); #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: init # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub init { my $self = shift; $self->SUPER::init(); $self->set_attributes( _permission_level => 'admin', _title => 'Upload GO Terms' ); $self->register_actions( 'Upload Terms' => { head => 'UploadTerms_head', body => 'UploadTerms_body' }, 'Upload Term Definitions' => { head => 'UploadTermDefs_head', body => 'UploadTermDefs_body' } ); return $self; } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: UploadTerms_head # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub UploadTerms_head { my $self = shift; require SGX::CSV; my ( $outputFileNames, $recordsValid ) = SGX::CSV::sanitizeUploadWithMessages( $self, 'file', parser => \@term_parser, csv_in_opts => { quote_char => undef }, ); my ($outputFileName) = @$outputFileNames; my $dbh = $self->{_dbh}; my $sth = $dbh->prepare(<<"END_loadTerms"); LOAD DATA LOCAL INFILE ? REPLACE INTO TABLE go_term FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES STARTING BY '' TERMINATED BY '\n' ( go_term_id, go_name, go_term_type, go_acc ) END_loadTerms my $recordsUpdated = $sth->execute($outputFileName); unlink $outputFileName; $self->add_message( "Success! Found $recordsValid valid entries; affected $recordsUpdated rows in the GO terms table." ); return 1; } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: UploadTermDefs_head # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub UploadTermDefs_head { my $self = shift; require SGX::CSV; my ( $outputFileNames, $recordsValid ) = SGX::CSV::sanitizeUploadWithMessages( $self, 'file', parser => \@term_definition_parser, csv_in_opts => { quote_char => undef }, ); my ($outputFileName) = @$outputFileNames; my $dbh = $self->{_dbh}; my $temp_table = time() . '_' . getppid(); my $sth_create_temp = $dbh->prepare(<<"END_loadTermDefs_createTemp"); CREATE TEMPORARY TABLE $temp_table ( go_term_id int(10) unsigned NOT NULL, go_term_definition text ) ENGINE=MyISAM END_loadTermDefs_createTemp my $sth = $dbh->prepare(<<"END_loadTermDefs"); LOAD DATA LOCAL INFILE ? INTO TABLE $temp_table FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES STARTING BY '' TERMINATED BY '\n' ( go_term_id, go_term_definition ) END_loadTermDefs my $sth_update = $dbh->prepare(<<"END_update"); UPDATE go_term INNER JOIN $temp_table USING(go_term_id) SET go_term.go_term_definition=$temp_table.go_term_definition END_update $sth_create_temp->execute(); my $recordsLoaded = $sth->execute($outputFileName); my $recordsUpdated = $sth_update->execute(); unlink $outputFileName; if ( $recordsLoaded != $recordsUpdated ) { $self->add_message( "Warning: Loaded $recordsLoaded records into temporary table but only $recordsUpdated records were updated" ); } $self->add_message( "Success! Found $recordsValid valid entries, updated $recordsUpdated GO term definitions." ); return 1; } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: UploadTerms_body # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub UploadTerms_body { my $self = shift; my $q = $self->{_cgi}; # show form to upload term definitions return $q->h2('Gene Ontology: Upload Term Definitions (File 2)'), $q->p(<<"END_info"), Now upload the file called term_definition.txt from the archive you downloaded and extracted from the GO webpage. If you skip this step, you can still use GO annotation, but your text searches will be limited to GO names. END_info $q->start_form( -accept_charset => 'ISO-8859-1', -method => 'POST', -enctype => 'multipart/form-data', -action => $q->url( absolute => 1 ) . '?a=uploadGO' ), $q->dl( $q->dt('Path to term_definition.txt:'), $q->dd( $q->filefield( -name => 'file', -title => 'File path to term.txt file containing GO term definitions' ) ), $q->dt(' '), $q->dd( $q->submit( -name => 'b', -value => 'Upload Term Definitions', -class => 'button black bigrounded' ) ) ), $q->end_form; } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: UploadTermDefs_body # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub UploadTermDefs_body { my $self = shift; my $q = $self->{_cgi}; return $q->p('You have successfully updated GO terms and definitions.'); } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: default_head # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub default_head { my $self = shift; my $dbh = $self->{_dbh}; my $sth = $dbh->prepare( 'select UPDATE_TIME from information_schema.tables where TABLE_SCHEMA=? and TABLE_NAME=?' ); my $rc = $sth->execute( $SEGEX_CONFIG{dbname}, 'go_term' ); my ($time) = $sth->fetchrow_array; $sth->finish; if ( defined $time ) { $self->add_message( "Segex GO term table was last updated on: $time $SEGEX_CONFIG{timezone}" ); } return 1; } #=== CLASS METHOD ============================================================ # CLASS: SGX::UploadGO # METHOD: default_body # PARAMETERS: ???? # RETURNS: ???? # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub default_body { my $self = shift; my $q = $self->{_cgi}; # here we will show form for updating GO term names and definitions return $q->h2('Gene Ontology: Upload Terms (File 1)'), $q->p( 'To update the gene ontology (GO) terms, follow these simple steps:', $q->ol( $q->li( 'Download MySQL version of "termdb" database from ' . $q->a( { -target => '_blank', -href => 'http://www.geneontology.org/GO.downloads.database.shtml', -title => 'Download termdb' }, 'GO Database Downloads' ) . ' page.' ), $q->li( 'Unzip the .tar.gz file and locate two files: term.txt and term_definition.txt.' ), $q->li( 'Upload these files to Segex, starting with term.txt.' ) ) ), $q->start_form( -accept_charset => 'ISO-8859-1', -method => 'POST', -enctype => 'multipart/form-data', -action => $q->url( absolute => 1 ) . '?a=uploadGO' ), $q->dl( $q->dt('Path to term.txt:'), $q->dd( $q->filefield( -name => 'file', -title => 'File path to term.txt file containing GO terms' ) ), $q->dt(' '), $q->dd( $q->submit( -name => 'b', -value => 'Upload Terms', -class => 'button black bigrounded' ) ) ), $q->end_form(); } 1; __END__ =head1 NAME SGX::UploadGO =head1 SYNOPSIS =head1 DESCRIPTION =head1 AUTHORS Eugene Scherba Michael McDuffie =head1 SEE ALSO =head1 COPYRIGHT =head1 LICENSE Artistic License 2.0 http://www.opensource.org/licenses/artistic-license-2.0.php =cut