Changeset 24 for trunk/grails-app/services/nl
- Timestamp:
- Mar 23, 2011, 1:24:24 PM (12 years ago)
- Location:
- trunk/grails-app/services/nl/tno/metagenomics
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/grails-app/services/nl/tno/metagenomics/FastaService.groovy
r13 r24 405 405 * @return 406 406 */ 407 public def export( List assaySamples, OutputStream outStream, String name ) {407 public def export( List assaySamples, OutputStream outStream, String name = null ) { 408 408 if( !assaySamples || assaySamples.size() == 0 ) 409 409 return false; 410 411 // Retrieve the filename from configuration, if none is given 412 if( !name ) 413 name = ConfigurationHolder.config.metagenomics.exportFilename 410 414 411 415 // Determine the directory the uploaded files are stored in … … 421 425 // Determine new tag length. Since we can use 4 characters per 422 426 // tag position, we only have to use 4log( #samples) 423 int tagLength = Math.ceil( Math.log( assaySamples.size() ) / Math.log( 4 ) ) 427 // The minimum number of characters used is 10, to ensure the correct working of the other 428 // programs. 429 int tagLength = Math.max( 10, Math.ceil( Math.log( assaySamples.size() ) / Math.log( 4 ) ) ); 424 430 int tagNumber = 0; 425 431 … … 430 436 431 437 // Save the tag for exporting 432 tags << [assaySampleId: assaySample.id, sampleName: assaySample.sample.name, assayName: assaySample.assay.name, studyName: assaySample.assay.study.name, tag: tag] 438 tags << [ assaySampleId: assaySample.id, sampleName: assaySample.sample.name, 439 assayName: assaySample.assay.name, studyName: assaySample.assay.study.name, 440 forwardPrimer: assaySample.fwPrimerSeq, reversePrimer: assaySample.revPrimerSeq, 441 tag: tag 442 ]; 433 443 } 434 444 } … … 441 451 // file and part of the qual files mixed. We have to write the full sequence file first. 442 452 try { 443 zipFile.putNextEntry( new ZipEntry( name + ".f asta" ) );453 zipFile.putNextEntry( new ZipEntry( name + ".fna" ) ); 444 454 445 455 assaySamples.each { assaySample -> … … 484 494 485 495 // Export a tab delimited file with tags 486 zipFile.putNextEntry( new ZipEntry( name + " _sample-tag.tab" ) );487 export SampleTagFile( tags, zipWriter );496 zipFile.putNextEntry( new ZipEntry( name + ".tab" ) ); 497 exportTabDelimitedSampleTagFile( tags, zipWriter ); 488 498 zipWriter.flush(); 489 499 zipFile.closeEntry(); 490 500 501 // Export a mothur file with tags 502 zipFile.putNextEntry( new ZipEntry( name + ".oligos" ) ); 503 exportMothurSampleTagFile( tags, zipWriter ); 504 zipWriter.flush(); 505 zipFile.closeEntry(); 506 491 507 // Export an excel file with information about the samples 492 508 zipFile.putNextEntry( new ZipEntry( name + ".xls" ) ); … … 498 514 499 515 /** 500 * Creates a tab delimited file with two columns and column headers "Sequence" and "Samplename" 516 * Creates an oligos file for Mothur that represents the connection between samples 517 * and the artificial tags. 518 * 519 * @see http://www.mothur.org/wiki/Trim.seqs#allfiles 501 520 * @param tags Map with newly created tags 502 521 * @param zipWriter Writer to write the data to 503 522 */ 504 protected void exportSampleTagFile( List tags, Writer zipWriter ) { 505 zipWriter.write( "Sequence" + "\t" + "Samplename" + "\n" ); 523 protected void exportMothurSampleTagFile( List tags, Writer zipWriter ) { 524 // Add the forward and reverse primers, as found in the assaysamples 525 // The primers are already cut off, so they are not relevant anymore 526 // For that reason, a '#' is prepended to each line. 527 def fwPrimers = tags.collect { it.forwardPrimer }.findAll { it }.unique(); 528 def revPrimers = tags.collect { it.reversePrimer }.findAll { it }.unique(); 529 530 fwPrimers.each { zipWriter.write( "#forward\t" + it + "\n" ) } 531 revPrimers.each { zipWriter.write( "#reverse\t" + it + "\n" ) } 506 532 507 533 // Check whether the sample names are unique. If they aren't, the assay and study names … … 519 545 520 546 tags.each { 521 zipWriter.write( it.tag + "\t" + it.uniqueName + "\n" ); 522 } 523 } 524 547 zipWriter.write( "barcode\t" + it.tag + "\t" + it.uniqueName + "\n" ); 548 } 549 } 550 551 /** 552 * Creates a tab delimited file with two columns and column headers "Sequence" and "Samplename" 553 * @param tags Map with newly created tags 554 * @param zipWriter Writer to write the data to 555 */ 556 protected void exportTabDelimitedSampleTagFile( List tags, Writer zipWriter ) { 557 zipWriter.write( "Sequence" + "\t" + "Samplename" + "\n" ); 558 559 // Check whether the sample names are unique. If they aren't, the assay and study names 560 // are appended to the sample name 561 def sampleNames = tags*.sampleNames; 562 if( sampleNames.unique().size() < sampleNames.size() ) { 563 tags.each { 564 it.uniqueName = it.sampleName + " (" + it.assayName + " / " + it.studyName + ")"; 565 } 566 } else { 567 tags.each { 568 it.uniqueName = it.sampleName; 569 } 570 } 571 572 tags.each { 573 zipWriter.write( it.tag + "\t" + it.uniqueName + "\n" ); 574 } 575 } 576 577 525 578 /** 526 579 * Creates a unique tag for the given number … … 541 594 int currentChar = tagNumber % numChars 542 595 543 tag = chars[ currentChar ] + tag; 596 // Append the new character to the end of the tag, to ensure that the first part of the tag is 597 // the most volatile. This way it is easy to find the end of the tag and the beginning of the real 598 // sequence on first sight. 599 tag = tag + chars[ currentChar ]; 544 600 545 601 tagNumber = Math.floor( tagNumber / numChars ); 546 602 } 547 603 548 return tag 604 return tag; 549 605 } 550 606 -
trunk/grails-app/services/nl/tno/metagenomics/SampleExcelService.groovy
r21 r24 11 11 12 12 // Fields to be edited using excel file and manually 13 def variableFields = [ 14 'fwOligo': 'Forward oligo number', 15 'fwMidName': 'Forward mid name', 16 'fwTotalSeq': 'Total forward sequence', 17 'fwMidSeq': 'Forward mid sequence', 18 'fwPrimerSeq': 'Forward primer sequence', 19 'revOligo': 'Reverse oligo number', 20 'revMidName': 'Reverse mid name', 21 'revTotalSeq': 'Total reverse sequence', 22 'revMidSeq': 'Reverse mid sequence', 23 'revPrimerSeq': 'Reverse primer sequence', 24 25 ] 26 13 27 def sampleNameName = "Sample name" 14 28 def runName = "Run" 15 def tagNameName = "Tag name" 16 def tagSequenceName = "Tag sequence" 17 def oligoNumberName = "Oligo number" 18 def possibleFields = [sampleNameName, runName, tagNameName, tagSequenceName, oligoNumberName] 29 def possibleFields = [sampleNameName, runName] + variableFields.keySet().toList(); 30 def possibleFieldNames = [sampleNameName, runName ] + variableFields.values(); 19 31 20 32 /** … … 35 47 def wb = excelService.create(); 36 48 37 def fields = possibleField s49 def fields = possibleFieldNames 38 50 if( !includeRun ) 39 51 fields = fields - runName … … 49 61 rowData << assaySample.run?.name 50 62 51 rowData << assaySample.tagName 52 rowData << assaySample.tagSequence 53 rowData << assaySample.oligoNumber 63 variableFields.each { k, v -> 64 rowData << assaySample[ k ]; 65 } 66 54 67 data << rowData; 55 68 } … … 57 70 58 71 // Auto resize columns 59 excelService.autoSizeColumns( wb, sheetIndex, 0.. 2)72 excelService.autoSizeColumns( wb, sheetIndex, 0..fields.size()) 60 73 61 74 return wb; … … 82 95 // Try to guess best matches between the excel file and the column names 83 96 def bestMatches = [:] 84 def fields = possibleField s97 def fields = possibleFieldNames 85 98 if( !includeRun ) 86 99 fields = fields - runName 87 88 100 89 101 // Do matching using fuzzy search. The 0.8 treshold makes sure that no match if chosen if 90 102 // there is actually no match at all. 91 def matches = fuzzySearchService.mostSimilarUnique( headers, possibleFields, 0.8 );103 def matches = fuzzySearchService.mostSimilarUnique( headers, fields, 0.8 ); 92 104 93 105 headers.eachWithIndex { header, idx -> … … 121 133 def columns = [:] 122 134 def dataMatches = false; 123 possibleField s.each { columnName ->135 possibleFieldNames.each { columnName -> 124 136 def foundColumn = matchColumns.find { it.value == columnName }; 125 137 … … 130 142 } 131 143 132 println columns133 134 144 // A column to match the sample name must be present 135 145 if( columns[ sampleNameName ] == -1 ) { … … 188 198 columns.each { 189 199 if( it.value > -1 ) { 190 switch( it.key ) { 191 case tagNameName: assaySample.tagName = rowData[ it.value ]; break 192 case tagSequenceName: assaySample.tagSequence = rowData[ it.value ]; break 193 case oligoNumberName: assaySample.oligoNumber = rowData[ it.value ]; break 194 case runName: assaySample.run = Run.findByName( rowData[ it.value ] ); break 200 if( it.key == runName ) { 201 assaySample.run = Run.findByName( rowData[ it.value ] ); 202 } else { 203 def field = variableFields.find { variableField -> variableField.value == it.key }; 204 if( field ) { 205 assaySample[ field.key ] = rowData[ it.value ]; 206 } 195 207 } 196 208 } … … 245 257 def subjectFields = [] 246 258 def eventFields = [] 247 def moduleFields = [ "Sample name", "Assay name", "Study name", "Run name", "# sequences", "Artificial tag sequence" , "Original tag sequence", "Tag name", "Oligo number" ]259 def moduleFields = [ "Sample name", "Assay name", "Study name", "Run name", "# sequences", "Artificial tag sequence" ] + variableFields.values(); 248 260 gscfData.each { sample -> 249 261 sample.each { key, value -> … … 281 293 assaySample.numSequences(), 282 294 currentTag?.tag, 283 assaySample.tagName,284 assaySample.tagSequence,285 assaySample.oligoNumber286 295 ] 296 297 // Add the variable fields for all assaysamples 298 variableFields.each { k, v -> 299 row << assaySample[ k ]; 300 } 287 301 288 302 // Afterwards add the gscfData including subject and event data … … 322 336 excelService.writeData( wb, data, sheetIndex, 0 ); 323 337 324 // Auto resize columns 325 excelService.autoSizeColumns( wb, sheetIndex, 0.. fields[ "names" ][ "all" ].size()-1)338 // Auto resize columns (# columns = # samples + 1) 339 excelService.autoSizeColumns( wb, sheetIndex, 0..assaySamples?.size()) 326 340 327 341 // Write the data to the output stream -
trunk/grails-app/services/nl/tno/metagenomics/files/ExcelService.groovy
r20 r24 185 185 * @return 186 186 */ 187 public Workbook autoSizeColumns( Workbook book, int sheetIndex = 0, def columns = 0 ) {187 public Workbook autoSizeColumns( Workbook book, int sheetIndex = 0, def columns = 0 ) { 188 188 if( book == null ) 189 189 throw new Exception( "No workbook given." ); -
trunk/grails-app/services/nl/tno/metagenomics/integration/SynchronizationService.groovy
r16 r24 634 634 // Update the sample object if necessary 635 635 sampleFound.name = gscfSample.name 636 sampleFound.subject = gscfSample.subject.toString() 637 sampleFound.event = gscfSample.event.toString() + ( gscfSample.startTime ? " (" + gscfSample.startTime + ")" : "" ) 636 setSubjectAndEventFromGSCF( sampleFound, gscfSample ); 638 637 sampleFound.save(); 639 638 } else { … … 648 647 // Update the sample object if necessary 649 648 sampleFound.name = gscfSample.name 650 sampleFound.subject = gscfSample.subject.toString() 651 sampleFound.event = gscfSample.event.toString() + ( gscfSample.startTime ? " (" + gscfSample.startTime + ")" : "" ) 649 setSubjectAndEventFromGSCF( sampleFound, gscfSample ); 652 650 sampleFound.save(); 653 651 } else { … … 656 654 // If it doesn't exist, create a new object 657 655 sampleFound = new Sample( sampleToken: gscfSample.sampleToken, name: gscfSample.name, study: assay.study ); 658 sampleFound.subject = gscfSample.subject.toString() 659 sampleFound.event = gscfSample.event.toString() + ( gscfSample.startTime ? " (" + gscfSample.startTime + ")" : "" ) 656 setSubjectAndEventFromGSCF( sampleFound, gscfSample ); 660 657 assay.study.addToSamples( sampleFound ); 661 658 sampleFound.save(); … … 673 670 } 674 671 } 672 } 673 674 /** 675 * Copies the subject and event properties from the gscf sample to the local sample 676 * @param sample Sample object to update 677 * @param gscfSample Map with properties about the gscf sample ('subject', 'event' and 'startTime' are used) 678 */ 679 private void setSubjectAndEventFromGSCF( sample, gscfSample ) { 680 sample.subject = gscfSample.subject && gscfSample.subject != "null" ? gscfSample.subject.toString() : "" 681 682 sample.event = gscfSample.event && gscfSample.event != "null" ? gscfSample.event.toString() : "" 683 684 if( gscfSample.startTime && gscfSample.startTime != "null" ) 685 sample.event += " (" + gscfSample.startTime + ")"; 675 686 } 676 687
Note: See TracChangeset
for help on using the changeset viewer.