1 | /** |
---|
2 | * AssayService Service |
---|
3 | * |
---|
4 | * @author s.h.sikkema@gmail.com |
---|
5 | * @since 20101216 |
---|
6 | * @package dbnp.studycapturing |
---|
7 | * |
---|
8 | * Revision information: |
---|
9 | * $Rev: 2158 $ |
---|
10 | * $Author: work@osx.eu $ |
---|
11 | * $Date: 2012-01-30 15:58:25 +0000 (ma, 30 jan 2012) $ |
---|
12 | */ |
---|
13 | package dbnp.studycapturing |
---|
14 | |
---|
15 | import org.apache.poi.ss.usermodel.* |
---|
16 | import org.apache.poi.xssf.usermodel.XSSFWorkbook |
---|
17 | import org.apache.poi.hssf.usermodel.HSSFWorkbook |
---|
18 | import org.codehaus.groovy.grails.web.json.JSONObject |
---|
19 | import org.dbnp.gdt.RelTime |
---|
20 | import org.dbnp.gdt.TemplateFieldType |
---|
21 | import java.text.NumberFormat |
---|
22 | import dbnp.authentication.SecUser |
---|
23 | |
---|
24 | class AssayService { |
---|
25 | |
---|
26 | boolean transactional = false |
---|
27 | def authenticationService |
---|
28 | def moduleCommunicationService |
---|
29 | |
---|
30 | /** |
---|
31 | * Collects the assay field names per category in a map as well as the |
---|
32 | * module's measurements. |
---|
33 | * |
---|
34 | * @param assay the assay for which to collect the fields |
---|
35 | * @param samples list of samples to retrieve the field names for. If not given, all samples from the assay are used. |
---|
36 | * @return a map of categories as keys and field names or measurements as |
---|
37 | * values |
---|
38 | */ |
---|
39 | def collectAssayTemplateFields(assay, samples, SecUser remoteUser = null) throws Exception { |
---|
40 | |
---|
41 | def getUsedTemplateFields = { templateEntities -> |
---|
42 | |
---|
43 | // gather all unique and non null template fields that haves values |
---|
44 | templateEntities*.giveFields().flatten().unique().findAll{ field -> |
---|
45 | |
---|
46 | field && templateEntities.any { it?.fieldExists(field.name) && it.getFieldValue(field.name) != null } |
---|
47 | |
---|
48 | }.collect{[name: it.name, comment: it.comment, displayName: it.name + (it.unit ? " ($it.unit)" : '')]} |
---|
49 | } |
---|
50 | |
---|
51 | def moduleError = '', moduleMeasurements = [] |
---|
52 | |
---|
53 | try { |
---|
54 | moduleMeasurements = requestModuleMeasurementNames(assay, remoteUser) |
---|
55 | } catch (e) { |
---|
56 | moduleError = e.message |
---|
57 | } |
---|
58 | |
---|
59 | if( !samples ) |
---|
60 | samples = assay.samples |
---|
61 | |
---|
62 | [ 'Subject Data' : getUsedTemplateFields( samples*."parentSubject".unique() ), |
---|
63 | 'Sampling Event Data' : getUsedTemplateFields( samples*."parentEvent".unique() ), |
---|
64 | 'Sample Data' : getUsedTemplateFields( samples ), |
---|
65 | 'Event Group' : [ |
---|
66 | [name: 'name', comment: 'Name of Event Group', displayName: 'name'] |
---|
67 | ], |
---|
68 | 'Module Measurement Data': moduleMeasurements, |
---|
69 | 'Module Error': moduleError |
---|
70 | ] |
---|
71 | |
---|
72 | } |
---|
73 | |
---|
74 | /** |
---|
75 | * Gathers all assay related data, including measurements from the module, |
---|
76 | * into 1 hash map containing: Subject Data, Sampling Event Data, Sample |
---|
77 | * Data, and module specific measurement data. |
---|
78 | * Data from each of the 4 hash map entries are themselves hash maps |
---|
79 | * representing a descriptive header (field name) as key and the data as |
---|
80 | * value. |
---|
81 | * |
---|
82 | * @param assay the assay to collect data for |
---|
83 | * @param fieldMap map with categories as keys and fields as values |
---|
84 | * @param measurementTokens selection of measurementTokens |
---|
85 | * @param samples list of samples for which the data should be retrieved. |
---|
86 | * Defaults to all samples from this assay. Supply [] or |
---|
87 | * null to include all samples. |
---|
88 | * @return The assay data structure as described above. |
---|
89 | */ |
---|
90 | def collectAssayData(assay, fieldMap, measurementTokens, samples, SecUser remoteUser = null) throws Exception { |
---|
91 | |
---|
92 | def collectFieldValuesForTemplateEntities = { headerFields, templateEntities -> |
---|
93 | |
---|
94 | // return a hash map with for each field name all values from the |
---|
95 | // template entity list |
---|
96 | headerFields.inject([:]) { map, headerField -> |
---|
97 | |
---|
98 | map + [(headerField.displayName): templateEntities.collect { entity -> |
---|
99 | |
---|
100 | // default to an empty string |
---|
101 | def val = '' |
---|
102 | |
---|
103 | if (entity) { |
---|
104 | def field |
---|
105 | try { |
---|
106 | |
---|
107 | val = entity.getFieldValue(headerField.name) |
---|
108 | |
---|
109 | // Convert RelTime fields to human readable strings |
---|
110 | field = entity.getField(headerField.name) |
---|
111 | if (field.type == TemplateFieldType.RELTIME) |
---|
112 | val = new RelTime( val as long ) |
---|
113 | |
---|
114 | } catch (NoSuchFieldException e) { /* pass */ } |
---|
115 | } |
---|
116 | |
---|
117 | (val instanceof Number) ? val : val.toString()}] |
---|
118 | } |
---|
119 | } |
---|
120 | |
---|
121 | def getFieldValues = { templateEntities, headerFields, propertyName = '' -> |
---|
122 | |
---|
123 | def returnValue |
---|
124 | |
---|
125 | // if no property name is given, simply collect the fields and |
---|
126 | // values of the template entities themselves |
---|
127 | if (propertyName == '') { |
---|
128 | |
---|
129 | returnValue = collectFieldValuesForTemplateEntities(headerFields, templateEntities) |
---|
130 | |
---|
131 | } else { |
---|
132 | |
---|
133 | // if a property name is given, we'll have to do a bit more work |
---|
134 | // to ensure efficiency. The reason for this is that for a list |
---|
135 | // of template entities, the properties referred to by |
---|
136 | // propertyName can include duplicates. For example, for 10 |
---|
137 | // samples, there may be less than 10 parent subjects. Maybe |
---|
138 | // there's only 1 parent subject. We don't want to collect field |
---|
139 | // values for this single subject 10 times ... |
---|
140 | def fieldValues |
---|
141 | |
---|
142 | // we'll get the unique list of properties to make sure we're |
---|
143 | // not getting the field values for identical template entity |
---|
144 | // properties more then once. |
---|
145 | def uniqueProperties = templateEntities*."$propertyName".unique() |
---|
146 | |
---|
147 | fieldValues = collectFieldValuesForTemplateEntities(headerFields, uniqueProperties) |
---|
148 | |
---|
149 | // prepare a lookup hashMap to be able to map an entities' |
---|
150 | // property (e.g. a sample's parent subject) to an index value |
---|
151 | // from the field values list |
---|
152 | int i = 0 |
---|
153 | def propertyToFieldValueIndexMap = uniqueProperties.inject([:]) { map, item -> map + [(item):i++]} |
---|
154 | |
---|
155 | // prepare the return value so that it has an entry for field |
---|
156 | // name. This will be the column name (second header line). |
---|
157 | returnValue = headerFields*.displayName.inject([:]) { map, item -> map + [(item):[]] } |
---|
158 | |
---|
159 | // finally, fill map the unique field values to the (possibly |
---|
160 | // not unique) template entity properties. In our example with |
---|
161 | // 1 unique parent subject, this means copying that subject's |
---|
162 | // field values to all 10 samples. |
---|
163 | templateEntities.each{ te -> |
---|
164 | |
---|
165 | headerFields*.displayName.each{ |
---|
166 | |
---|
167 | returnValue[it] << fieldValues[it][propertyToFieldValueIndexMap[te[propertyName]]] |
---|
168 | } |
---|
169 | } |
---|
170 | } |
---|
171 | returnValue |
---|
172 | } |
---|
173 | |
---|
174 | // Find samples and sort by name |
---|
175 | if ( !samples ) samples = assay.samples.toList() |
---|
176 | samples = samples.sort { it.name } |
---|
177 | |
---|
178 | def eventFieldMap = [:] |
---|
179 | |
---|
180 | // check whether event group data was requested |
---|
181 | if (fieldMap['Event Group']) { |
---|
182 | |
---|
183 | def names = samples*.parentEventGroup*.name.flatten() |
---|
184 | |
---|
185 | // only set name field when there's actual data |
---|
186 | if (!names.every {!it}) eventFieldMap['name'] = names |
---|
187 | |
---|
188 | } |
---|
189 | |
---|
190 | def moduleError = '', moduleMeasurementData = [:], moduleMeasurementMetaData = [:] |
---|
191 | |
---|
192 | if (measurementTokens) { |
---|
193 | |
---|
194 | try { |
---|
195 | moduleMeasurementData = requestModuleMeasurements(assay, measurementTokens, samples, remoteUser) |
---|
196 | } catch (e) { |
---|
197 | moduleMeasurementData = ['error' : [ |
---|
198 | 'Module error, module not available or unknown assay'] |
---|
199 | * samples.size() ] |
---|
200 | e.printStackTrace() |
---|
201 | moduleError = e.message |
---|
202 | } |
---|
203 | } |
---|
204 | |
---|
205 | [ 'Subject Data' : getFieldValues(samples, fieldMap['Subject Data'], 'parentSubject'), |
---|
206 | 'Sampling Event Data' : getFieldValues(samples, fieldMap['Sampling Event Data'], 'parentEvent'), |
---|
207 | 'Sample Data' : getFieldValues(samples, fieldMap['Sample Data']), |
---|
208 | 'Event Group' : eventFieldMap, |
---|
209 | 'Module Measurement Data' : moduleMeasurementData, |
---|
210 | 'Module Error' : moduleError |
---|
211 | ] |
---|
212 | } |
---|
213 | |
---|
214 | /** |
---|
215 | * Prepend data from study to the data structure |
---|
216 | * @param assayData Column wise data structure of samples |
---|
217 | * @param assay Assay object the data should be selected from |
---|
218 | * @param numValues Number of values for this assay |
---|
219 | * @return Extended column wise data structure |
---|
220 | */ |
---|
221 | def prependStudyData( inputData, Assay assay, numValues ) { |
---|
222 | if( !assay ) |
---|
223 | return inputData; |
---|
224 | |
---|
225 | // Retrieve study data |
---|
226 | def studyData =[:] |
---|
227 | assay.parent?.giveFields().each { |
---|
228 | def value = assay.parent.getFieldValue( it.name ) |
---|
229 | if( value ) |
---|
230 | studyData[ it.name ] = [value]* numValues |
---|
231 | } |
---|
232 | |
---|
233 | return [ |
---|
234 | 'Study Data': studyData |
---|
235 | ] + inputData |
---|
236 | } |
---|
237 | |
---|
238 | /** |
---|
239 | * Prepend data from assay to the data structure |
---|
240 | * @param assayData Column wise data structure of samples |
---|
241 | * @param assay Assay object the data should be selected from |
---|
242 | * @param numValues Number of values for this assay |
---|
243 | * @return Extended column wise data structure |
---|
244 | */ |
---|
245 | def prependAssayData( inputData, Assay assay, numValues ) { |
---|
246 | if( !assay ) |
---|
247 | return inputData; |
---|
248 | |
---|
249 | // Retrieve assay data |
---|
250 | def assayData = [:] |
---|
251 | assay.giveFields().each { |
---|
252 | def value = assay.getFieldValue( it.name ) |
---|
253 | if( value ) |
---|
254 | assayData[ it.name ] = [value]* numValues |
---|
255 | } |
---|
256 | |
---|
257 | return [ |
---|
258 | 'Assay Data': assayData |
---|
259 | ] + inputData |
---|
260 | } |
---|
261 | |
---|
262 | /** |
---|
263 | * Retrieves measurement names from the module through a rest call |
---|
264 | * |
---|
265 | * @param consumer the url of the module |
---|
266 | * @param path path of the rest call to the module |
---|
267 | * @return |
---|
268 | */ |
---|
269 | def requestModuleMeasurementNames(assay, SecUser remoteUser = null) { |
---|
270 | |
---|
271 | def moduleUrl = assay.module.url |
---|
272 | |
---|
273 | def path = moduleUrl + "/rest/getMeasurements/query" |
---|
274 | def query = "assayToken=${assay.giveUUID()}" |
---|
275 | def jsonArray |
---|
276 | |
---|
277 | try { |
---|
278 | jsonArray = moduleCommunicationService.callModuleMethod(moduleUrl, path, query, "POST", remoteUser) |
---|
279 | } catch (e) { |
---|
280 | throw new Exception("An error occured while trying to get the measurement tokens from the $assay.module.name. \ |
---|
281 | This means the module containing the measurement data is not available right now. Please try again \ |
---|
282 | later or notify the system administrator if the problem persists. URL: $path?$query.") |
---|
283 | } |
---|
284 | |
---|
285 | def result = jsonArray.collect { |
---|
286 | if( it == JSONObject.NULL ) |
---|
287 | return "" |
---|
288 | else |
---|
289 | return it.toString() |
---|
290 | } |
---|
291 | |
---|
292 | return result |
---|
293 | } |
---|
294 | |
---|
295 | /** |
---|
296 | * Retrieves module measurement data through a rest call to the module |
---|
297 | * |
---|
298 | * @param assay Assay for which the module measurements should be retrieved |
---|
299 | * @param measurementTokens List with the names of the fields to be retrieved. Format: [ 'measurementName1', 'measurementName2' ] |
---|
300 | * @param samples Samples to collect measurements for |
---|
301 | * @return |
---|
302 | */ |
---|
303 | def requestModuleMeasurements(assay, inputMeasurementTokens, samples, SecUser remoteUser = null) { |
---|
304 | |
---|
305 | def moduleUrl = assay.module.url |
---|
306 | |
---|
307 | def tokenString = '' |
---|
308 | |
---|
309 | inputMeasurementTokens.each{ tokenString+="&measurementToken=${it.encodeAsURL()}" } |
---|
310 | |
---|
311 | /* Contact module to fetch measurement data */ |
---|
312 | def path = moduleUrl + "/rest/getMeasurementData/query" |
---|
313 | def query = "assayToken=$assay.assayUUID$tokenString" |
---|
314 | |
---|
315 | if (samples) { |
---|
316 | query += '&' + samples*.sampleUUID.collect { "sampleToken=$it" }.join('&') |
---|
317 | } |
---|
318 | |
---|
319 | def sampleTokens = [], measurementTokens = [], moduleData = [] |
---|
320 | |
---|
321 | try { |
---|
322 | (sampleTokens, measurementTokens, moduleData) = moduleCommunicationService.callModuleMethod(moduleUrl, path, query, "POST", remoteUser) |
---|
323 | } catch (e) { |
---|
324 | e.printStackTrace() |
---|
325 | throw new Exception("An error occured while trying to get the measurement data from the $assay.module.name. \ |
---|
326 | This means the module containing the measurement data is not available right now. Please try again \ |
---|
327 | later or notify the system administrator if the problem persists. URL: $path?$query.") |
---|
328 | } |
---|
329 | |
---|
330 | if (!sampleTokens?.size()) return [] |
---|
331 | |
---|
332 | // Convert the three different maps into a map like: |
---|
333 | // |
---|
334 | // [ "measurement 1": [ value1, value2, value3 ], |
---|
335 | // "measurement 2": [ value4, value5, value6 ] ] |
---|
336 | // |
---|
337 | // The returned values should be in the same order as the given samples-list |
---|
338 | def map = [:] |
---|
339 | def numSampleTokens = sampleTokens.size(); |
---|
340 | |
---|
341 | measurementTokens.eachWithIndex { measurementToken, measurementIndex -> |
---|
342 | def measurements = []; |
---|
343 | samples.each { sample -> |
---|
344 | |
---|
345 | // Do measurements for this sample exist? If not, a null value is returned |
---|
346 | // for this sample. Otherwise, the measurement is looked up in the list with |
---|
347 | // measurements, based on the sample token |
---|
348 | if( sampleTokens.collect{ it.toString() }.contains( sample.giveUUID() ) ) { |
---|
349 | def tokenIndex = sampleTokens.indexOf( sample.giveUUID() ); |
---|
350 | def valueIndex = measurementIndex * numSampleTokens + tokenIndex; |
---|
351 | |
---|
352 | // If the module data is in the wrong format, show an error in the log file |
---|
353 | // and return a null value for this measurement. |
---|
354 | if( valueIndex >= moduleData.size() ) { |
---|
355 | log.error "Module measurements given by module " + assay.module.name + " are not in the right format: " + measurementTokens?.size() + " measurements, " + sampleTokens?.size() + " samples, " + moduleData?.size() + " values" |
---|
356 | measurements << null |
---|
357 | } else { |
---|
358 | |
---|
359 | def val |
---|
360 | def measurement = moduleData[ valueIndex ] |
---|
361 | |
---|
362 | if (measurement == JSONObject.NULL) val = "" |
---|
363 | else if (measurement instanceof Number) val = measurement |
---|
364 | else if (measurement.isDouble()) val = measurement.toDouble() |
---|
365 | else val = measurement.toString() |
---|
366 | measurements << val |
---|
367 | } |
---|
368 | } else { |
---|
369 | measurements << null |
---|
370 | } |
---|
371 | } |
---|
372 | map[ measurementToken.toString() ] = measurements |
---|
373 | } |
---|
374 | |
---|
375 | return map; |
---|
376 | } |
---|
377 | |
---|
378 | /** |
---|
379 | * Retrieves module measurement meta data through a rest call to the module |
---|
380 | * |
---|
381 | * @param assay Assay for which the module measurements should be retrieved |
---|
382 | * @param measurementTokens List with the names of the fields to be retrieved. Format: [ 'measurementName1', 'measurementName2' ] |
---|
383 | * @return |
---|
384 | */ |
---|
385 | def requestModuleMeasurementMetaDatas(assay, inputMeasurementTokens, SecUser remoteUser = null) { |
---|
386 | |
---|
387 | def measurementTokenMetaData = [:] |
---|
388 | |
---|
389 | def moduleUrl = assay.module.url |
---|
390 | |
---|
391 | def tokenString = '' |
---|
392 | inputMeasurementTokens.each{ tokenString+="&measurementToken=${it.encodeAsURL()}" } |
---|
393 | |
---|
394 | def pathMeasurementMetaData = moduleUrl + "/rest/getMeasurementMetaData/" |
---|
395 | def queryMeasurementMetaData = "assayToken=$assay.assayUUID$tokenString" |
---|
396 | |
---|
397 | try { |
---|
398 | moduleCommunicationService.callModuleMethod(moduleUrl, pathMeasurementMetaData, queryMeasurementMetaData, "POST", remoteUser).each { metaDataArray -> |
---|
399 | if (metaDataArray['name']){ |
---|
400 | measurementTokenMetaData[metaDataArray['name']] = metaDataArray //convert list to an associative array |
---|
401 | } |
---|
402 | } |
---|
403 | } catch (e) { |
---|
404 | e.printStackTrace() |
---|
405 | throw new Exception("An error occured while trying to get the measurement meta data from the $assay.module.name. \ |
---|
406 | This means the module containing the measurement data is not available right now. Please try again \ |
---|
407 | later or notify the system administrator if the problem persists. URL: $path?$query.") |
---|
408 | } |
---|
409 | |
---|
410 | return measurementTokenMetaData |
---|
411 | } |
---|
412 | |
---|
413 | /** |
---|
414 | * Modules can provide meta-data about the measurements. If so they can be added to the export. |
---|
415 | * This method extends the data with the meta-data. |
---|
416 | * |
---|
417 | * @return data + meta-data |
---|
418 | */ |
---|
419 | def mergeModuleDataWithMetadata(data, metadata = null){ |
---|
420 | |
---|
421 | if (metadata == null){ |
---|
422 | return data |
---|
423 | } |
---|
424 | |
---|
425 | //check if there is meta-data available for the features in the data |
---|
426 | if (!(data[1].intersect(metadata.keySet()))){ |
---|
427 | return data |
---|
428 | } |
---|
429 | |
---|
430 | //find out where the measurements start in the data |
---|
431 | def addLabelsAtColumnPosition = null |
---|
432 | data[0].eachWithIndex { cat, i -> |
---|
433 | if (cat == 'Module Measurement Data'){ |
---|
434 | addLabelsAtColumnPosition = i |
---|
435 | } |
---|
436 | } |
---|
437 | |
---|
438 | //check if we have a position to inject the labels |
---|
439 | if (addLabelsAtColumnPosition == null){ |
---|
440 | return data //returning data as we were unable to find any measurements. Or at least where the start. |
---|
441 | } |
---|
442 | |
---|
443 | //find out all (unique) feature properties |
---|
444 | def featureProperties = [] |
---|
445 | metadata.values().each { featureProperties += it.keySet() } |
---|
446 | featureProperties = featureProperties.unique() |
---|
447 | |
---|
448 | //prepare the additional rows with meta-data |
---|
449 | def additionalRows = [] |
---|
450 | featureProperties.each { mdfp -> |
---|
451 | def addRow = [] |
---|
452 | (1..addLabelsAtColumnPosition).each { |
---|
453 | //add some empty fields before the labels of the meta-data properties |
---|
454 | addRow.add(null) |
---|
455 | } |
---|
456 | addRow.add(mdfp) |
---|
457 | data[1].eachWithIndex { feature, i -> |
---|
458 | if (i >= addLabelsAtColumnPosition) { addRow.add(metadata[feature][mdfp]) } |
---|
459 | } |
---|
460 | additionalRows.add(addRow) |
---|
461 | } |
---|
462 | |
---|
463 | //add the additional row and add a null to the feature label column in the data rows |
---|
464 | if (additionalRows){ |
---|
465 | def tempData = [] |
---|
466 | data.eachWithIndex { row, iRow -> |
---|
467 | |
---|
468 | //this is an existing row (not meta-data), so we add a null under the feature label column |
---|
469 | def tempR = [] |
---|
470 | row.eachWithIndex { rowElement, iRowElement -> |
---|
471 | if ((iRow != 0) && (iRowElement == addLabelsAtColumnPosition)){ |
---|
472 | tempR.add(null) //add an empty element under the meta-data label |
---|
473 | } |
---|
474 | tempR.add(rowElement) |
---|
475 | } |
---|
476 | tempData.add(tempR) |
---|
477 | |
---|
478 | //as the additional rows have already been formatted in the correct way we only have to add them under the row that holds the categories |
---|
479 | if (iRow == 1){ |
---|
480 | additionalRows.each { additionalRow -> |
---|
481 | tempData.add(additionalRow) |
---|
482 | } |
---|
483 | } |
---|
484 | } |
---|
485 | //overwrite data with the tempRowData |
---|
486 | data = tempData |
---|
487 | } |
---|
488 | |
---|
489 | return data |
---|
490 | } |
---|
491 | |
---|
492 | /** |
---|
493 | * Merges the data from multiple studies into a structure that can be exported to an excel file. The format for each assay is |
---|
494 | * |
---|
495 | * [Category1: |
---|
496 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
497 | * Category2: |
---|
498 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
499 | * |
---|
500 | * Where the category describes the category of data that is presented (e.g. subject, sample etc.) and the column names describe |
---|
501 | * the fields that are present. Each entry in the lists shows the value for that column for an entity. In this case, 3 entities are described. |
---|
502 | * Each field should give values for all entities, so the length of all value-lists should be the same. |
---|
503 | * |
---|
504 | * Example: If the following input is given (2 assays) |
---|
505 | * |
---|
506 | * [ |
---|
507 | * [Category1: |
---|
508 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
509 | * Category2: |
---|
510 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]], |
---|
511 | * [Category1: |
---|
512 | * [Column1: [16,17], Column6: [18,19]], |
---|
513 | * Category3: |
---|
514 | * [Column3: [20,21], Column8: [22,23]]] |
---|
515 | * ] |
---|
516 | * |
---|
517 | * the output will be (5 entries for each column, empty values for fields that don't exist in some assays) |
---|
518 | * |
---|
519 | * [ |
---|
520 | * [Category1: |
---|
521 | * [Column1: [1,2,3,16,17], Column2: [4,5,6,,], Column6: [,,,18,19]], |
---|
522 | * Category2: |
---|
523 | * [Column3: [7,8,9,,], Column4: [10,11,12,,], Column5: [13,14,15,,]], |
---|
524 | * Category3: |
---|
525 | * [Column3: [,,,20,21], Column8: [,,,22,23]] |
---|
526 | * ] |
---|
527 | * |
---|
528 | * |
---|
529 | * @param columnWiseAssayData List with each entry being the column wise data of an assay. The format for each |
---|
530 | * entry is described above |
---|
531 | * @return Hashmap Combined assay data, in the same structure as each input entry. Empty values are given as an empty string. |
---|
532 | * So for input entries |
---|
533 | */ |
---|
534 | def mergeColumnWiseDataOfMultipleStudies(def columnWiseAssayData) { |
---|
535 | // Compute the number of values that is expected for each assay. This number is |
---|
536 | // used later on to determine the number of empty fields to add if a field is not present in this |
---|
537 | // assay |
---|
538 | def numValues = columnWiseAssayData.collect { assay -> |
---|
539 | for( cat in assay ) { |
---|
540 | if( cat ) { |
---|
541 | for( field in cat.value ) { |
---|
542 | if( field?.value?.size() > 0 ) { |
---|
543 | return field.value.size(); |
---|
544 | } |
---|
545 | } |
---|
546 | } |
---|
547 | } |
---|
548 | |
---|
549 | return 0; |
---|
550 | } |
---|
551 | |
---|
552 | // Merge categories from all assays. Create a list for all categories |
---|
553 | def categories = columnWiseAssayData*.keySet().toList().flatten().unique(); |
---|
554 | def mergedColumnWiseData = [:] |
---|
555 | categories.each { category -> |
---|
556 | // Only work with this category for all assays |
---|
557 | def categoryData = columnWiseAssayData*.getAt( category ); |
---|
558 | |
---|
559 | // Find the different fields in all assays |
---|
560 | def categoryFields = categoryData.findAll{ it }*.keySet().toList().flatten().unique(); |
---|
561 | |
---|
562 | // Find data for all assays for these fields. If the fields do not exist, return an empty string |
---|
563 | def categoryValues = [:] |
---|
564 | categoryFields.each { field -> |
---|
565 | categoryValues[ field ] = []; |
---|
566 | |
---|
567 | // Loop through all assays |
---|
568 | categoryData.eachWithIndex { assayValues, idx -> |
---|
569 | if( assayValues && assayValues.containsKey( field ) ) { |
---|
570 | // Append the values if they exist |
---|
571 | categoryValues[ field ] += assayValues[ field ]; |
---|
572 | } else { |
---|
573 | // Append empty string for each entity if the field doesn't exist |
---|
574 | categoryValues[ field ] += [""]* numValues[ idx ] |
---|
575 | } |
---|
576 | } |
---|
577 | } |
---|
578 | |
---|
579 | mergedColumnWiseData[ category ] = categoryValues |
---|
580 | } |
---|
581 | |
---|
582 | return mergedColumnWiseData; |
---|
583 | } |
---|
584 | |
---|
585 | /** |
---|
586 | * Merges the data from multiple studies into a structure that can be exported to an excel file. The format for each assay is |
---|
587 | * |
---|
588 | * [Category1: |
---|
589 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
590 | * Category2: |
---|
591 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
592 | * |
---|
593 | * Where the category describes the category of data that is presented (e.g. subject, sample etc.) and the column names describe |
---|
594 | * the fields that are present. Each entry in the lists shows the value for that column for an entity. In this case, 3 entities are described. |
---|
595 | * Each field should give values for all entities, so the length of all value-lists should be the same. |
---|
596 | * |
---|
597 | * Example: If the following input is given (2 assays) |
---|
598 | * |
---|
599 | * [ |
---|
600 | * [Category1: |
---|
601 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
602 | * Category2: |
---|
603 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]], |
---|
604 | * [Category1: |
---|
605 | * [Column1: [16,17], Column6: [18,19]], |
---|
606 | * Category3: |
---|
607 | * [Column3: [20,21], Column8: [22,23]]] |
---|
608 | * ] |
---|
609 | * |
---|
610 | * the output will be (5 entries for each column, empty values for fields that don't exist in some assays) |
---|
611 | * |
---|
612 | * [ |
---|
613 | * [Category1: |
---|
614 | * [Column1: [1,2,3,16,17], Column2: [4,5,6,,], Column6: [,,,18,19]], |
---|
615 | * Category2: |
---|
616 | * [Column3: [7,8,9,,], Column4: [10,11,12,,], Column5: [13,14,15,,]], |
---|
617 | * Category3: |
---|
618 | * [Column3: [,,,20,21], Column8: [,,,22,23]] |
---|
619 | * ] |
---|
620 | * |
---|
621 | * |
---|
622 | * @param columnWiseAssayData List with each entry being the column wise data of an assay. The format for each |
---|
623 | * entry is described above. The data MUST have a category named 'Sample Data' and in that map a field |
---|
624 | * named 'id'. This field is used for matching rows. However, the column is removed, unless |
---|
625 | * removeIdColumn is set to false |
---|
626 | * @param removeIdColumn If set to true (default), the values for the sample id are removed from the output. |
---|
627 | * @return Hashmap Combined assay data, in the same structure as each input entry. Empty values are given as an empty string. |
---|
628 | * So for input entries |
---|
629 | */ |
---|
630 | def mergeColumnWiseDataOfMultipleStudiesForASetOfSamples(def columnWiseAssayData, boolean removeIdColumn = true ) { |
---|
631 | // Merge all assays and studies into one list |
---|
632 | def mergedData = mergeColumnWiseDataOfMultipleStudies( columnWiseAssayData ) |
---|
633 | |
---|
634 | // A map with keys being the sampleIds, and the values are the indices of that sample in the values list |
---|
635 | def idMap = [:] |
---|
636 | |
---|
637 | // A map with the key being an index in the value list, and the value is the index the values should be copied to |
---|
638 | def convertMap = [:] |
---|
639 | |
---|
640 | for( int i = 0; i < mergedData[ "Sample Data" ][ "id" ].size(); i++ ) { |
---|
641 | def id = mergedData[ "Sample Data" ][ "id" ][ i ]; |
---|
642 | |
---|
643 | if( idMap[ id ] == null ) { |
---|
644 | // This id occurs for the first time |
---|
645 | idMap[ id ] = i; |
---|
646 | convertMap[ i ] = i; |
---|
647 | } else { |
---|
648 | convertMap[ i ] = idMap[ id ]; |
---|
649 | } |
---|
650 | } |
---|
651 | |
---|
652 | /* |
---|
653 | * Example output: |
---|
654 | * idMap: [ 12: 0, 24: 1, 26: 3 ] |
---|
655 | * convertMap: [ 0: 0, 1: 1, 2: 0, 3: 3, 4: 3 ] |
---|
656 | * (meaning: rows 0, 1 and 3 should remain, row 2 should be merged with row 0 and row 4 should be merged with row 3) |
---|
657 | * |
---|
658 | * The value in the convertMap is always lower than its key. So we sort the convertMap on the keys. That way, we can |
---|
659 | * loop through the values and remove the row that has been merged. |
---|
660 | */ |
---|
661 | |
---|
662 | convertMap.sort { a, b -> b.key <=> a.key }.each { |
---|
663 | def row = it.key; |
---|
664 | def mergeWith = it.value; |
---|
665 | |
---|
666 | if( row != mergeWith ) { |
---|
667 | // Combine the data on row [row] with the data on row [mergeWith] |
---|
668 | |
---|
669 | mergedData.each { |
---|
670 | def cat = it.key; def fields = it.value; |
---|
671 | fields.each { fieldData -> |
---|
672 | def fieldName = fieldData.key; |
---|
673 | def fieldValues = fieldData.value; |
---|
674 | |
---|
675 | // If one of the fields to merge is empty, use the other one |
---|
676 | // Otherwise the values should be the same (e.g. study, subject, sample data) |
---|
677 | fieldValues[ mergeWith ] = ( fieldValues[ mergeWith ] == null || fieldValues[ mergeWith ] == "" ) ? fieldValues[ row ] : fieldValues[ mergeWith ] |
---|
678 | |
---|
679 | // Remove the row from this list |
---|
680 | fieldValues.remove( row ); |
---|
681 | } |
---|
682 | } |
---|
683 | } |
---|
684 | } |
---|
685 | |
---|
686 | // Remove sample id if required |
---|
687 | if( removeIdColumn ) |
---|
688 | mergedData[ "Sample Data" ].remove( "id" ); |
---|
689 | |
---|
690 | return mergedData |
---|
691 | } |
---|
692 | |
---|
693 | /** |
---|
694 | * Converts column |
---|
695 | * @param columnData multidimensional map containing column data. |
---|
696 | * On the top level, the data must be grouped by category. Each key is the |
---|
697 | * category title and the values are maps representing the columns. Each |
---|
698 | * column also has a title (its key) and a list of values. Columns must be |
---|
699 | * equally sized. |
---|
700 | * |
---|
701 | * For example, consider the following map: |
---|
702 | * [Category1: |
---|
703 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
704 | * Category2: |
---|
705 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
706 | * |
---|
707 | * which will be written as: |
---|
708 | * |
---|
709 | * | Category1 | | Category2 | | | |
---|
710 | * | Column1 | Column2 | Column3 | Column4 | Column5 | |
---|
711 | * | 1 | 4 | 7 | 10 | 13 | |
---|
712 | * | 2 | 5 | 8 | 11 | 14 | |
---|
713 | * | 3 | 6 | 9 | 12 | 15 | |
---|
714 | * |
---|
715 | * @return row wise data |
---|
716 | */ |
---|
717 | def convertColumnToRowStructure(columnData) { |
---|
718 | |
---|
719 | // check if all columns have the dimensionality 2 |
---|
720 | if (columnData.every { it.value.every { it.value instanceof ArrayList } }) { |
---|
721 | |
---|
722 | def headers = [[], []] |
---|
723 | |
---|
724 | columnData.each { category -> |
---|
725 | |
---|
726 | if (category.value.size()) { |
---|
727 | |
---|
728 | // put category keys into first row separated by null values |
---|
729 | // wherever there are > 1 columns per category |
---|
730 | headers[0] += [category.key]+ [null]* (category.value.size() - 1) |
---|
731 | |
---|
732 | // put non-category column headers into 2nd row |
---|
733 | headers[1] += category.value.collect{it.key} |
---|
734 | |
---|
735 | } |
---|
736 | |
---|
737 | } |
---|
738 | |
---|
739 | def d = [] |
---|
740 | |
---|
741 | // add all column wise data into 'd' |
---|
742 | columnData.each { it.value.each { d << it.value } } |
---|
743 | |
---|
744 | // transpose d into row wise data and combine with header rows |
---|
745 | headers + d.transpose() |
---|
746 | } else [] |
---|
747 | |
---|
748 | } |
---|
749 | |
---|
750 | /** |
---|
751 | * Export column wise data in Excel format to a stream. |
---|
752 | * |
---|
753 | * @param columnData Multidimensional map containing column data |
---|
754 | * @param outputStream Stream to write to |
---|
755 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
756 | * @return |
---|
757 | */ |
---|
758 | def exportColumnWiseDataToExcelFile(columnData, outputStream, useOfficeOpenXML = true) { |
---|
759 | |
---|
760 | // transform data into row based structure for easy writing |
---|
761 | def rows = convertColumnToRowStructure(columnData) |
---|
762 | |
---|
763 | if (rows) { |
---|
764 | |
---|
765 | exportRowWiseDataToExcelFile(rows, outputStream, useOfficeOpenXML) |
---|
766 | |
---|
767 | } else { |
---|
768 | |
---|
769 | throw new Exception('Wrong column data format.') |
---|
770 | |
---|
771 | } |
---|
772 | |
---|
773 | } |
---|
774 | |
---|
775 | /** |
---|
776 | * Export row wise data in Excel format to a stream |
---|
777 | * |
---|
778 | * @param rowData List of lists containing for each row all cell values |
---|
779 | * @param outputStream Stream to write to |
---|
780 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
781 | * @return |
---|
782 | */ |
---|
783 | def exportRowWiseDataToExcelFile(rowData, outputStream, useOfficeOpenXML = true) { |
---|
784 | Workbook wb = useOfficeOpenXML ? new XSSFWorkbook() : new HSSFWorkbook() |
---|
785 | Sheet sheet = wb.createSheet() |
---|
786 | |
---|
787 | exportRowWiseDataToExcelSheet( rowData, sheet ); |
---|
788 | |
---|
789 | wb.write(outputStream) |
---|
790 | outputStream.close() |
---|
791 | } |
---|
792 | |
---|
793 | /** |
---|
794 | * Export row wise data in CSV to a stream. All values are surrounded with |
---|
795 | * double quotes (" "). |
---|
796 | * |
---|
797 | * @param rowData List of lists containing for each row all cell values |
---|
798 | * @param outputStream Stream to write to |
---|
799 | * @return |
---|
800 | */ |
---|
801 | def exportRowWiseDataToCSVFile(rowData, outputStream, outputDelimiter = '\t', locale = java.util.Locale.US) { |
---|
802 | |
---|
803 | def formatter = NumberFormat.getNumberInstance(locale) |
---|
804 | formatter.setGroupingUsed false // we don't want grouping (thousands) separators |
---|
805 | formatter.setMaximumFractionDigits(15) |
---|
806 | |
---|
807 | outputStream << rowData.collect { row -> |
---|
808 | |
---|
809 | row.collect{ |
---|
810 | |
---|
811 | // omit quotes in case of numeric values and format using chosen locale |
---|
812 | if (it instanceof Number) return formatter.format(it) |
---|
813 | |
---|
814 | def s = it?.toString() ?: '' |
---|
815 | |
---|
816 | def addQuotes = false |
---|
817 | |
---|
818 | // escape double quotes with double quotes if they exist and |
---|
819 | // enable surround with quotes |
---|
820 | if (s.contains('"')) { |
---|
821 | addQuotes = true |
---|
822 | s = s.replaceAll('"','""') |
---|
823 | } else { |
---|
824 | // enable surround with quotes in case of comma's |
---|
825 | if (s.contains(',') || s.contains('\n')) addQuotes = true |
---|
826 | } |
---|
827 | |
---|
828 | addQuotes ? "\"$s\"" : s |
---|
829 | |
---|
830 | }.join(outputDelimiter) |
---|
831 | }.join('\n') |
---|
832 | |
---|
833 | outputStream.close() |
---|
834 | } |
---|
835 | |
---|
836 | /** |
---|
837 | * Export row wise data for multiple assays in Excel format (separate sheets) to a stream |
---|
838 | * |
---|
839 | * @param rowData List of structures with rowwise data for each assay |
---|
840 | * @param outputStream Stream to write to |
---|
841 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
842 | * @return |
---|
843 | */ |
---|
844 | def exportRowWiseDataForMultipleAssaysToExcelFile(assayData, outputStream, useOfficeOpenXML = true) { |
---|
845 | Workbook wb = useOfficeOpenXML ? new XSSFWorkbook() : new HSSFWorkbook() |
---|
846 | |
---|
847 | assayData.each { rowData -> |
---|
848 | Sheet sheet = wb.createSheet() |
---|
849 | |
---|
850 | exportRowWiseDataToExcelSheet( rowData, sheet ); |
---|
851 | } |
---|
852 | |
---|
853 | wb.write(outputStream) |
---|
854 | outputStream.close() |
---|
855 | } |
---|
856 | |
---|
857 | /** |
---|
858 | * Export row wise data in Excel format to a given sheet in an excel workbook |
---|
859 | * |
---|
860 | * @param rowData List of lists containing for each row all cell values |
---|
861 | * @param sheet Excel sheet to append the |
---|
862 | * @return |
---|
863 | */ |
---|
864 | def exportRowWiseDataToExcelSheet(rowData, Sheet sheet) { |
---|
865 | // create all rows |
---|
866 | rowData.size().times { sheet.createRow it } |
---|
867 | |
---|
868 | sheet.eachWithIndex { Row row, ri -> |
---|
869 | if( rowData[ ri ] ) { |
---|
870 | // create appropriate number of cells for this row |
---|
871 | rowData[ri].size().times { row.createCell it } |
---|
872 | |
---|
873 | row.eachWithIndex { Cell cell, ci -> |
---|
874 | |
---|
875 | // Numbers and values of type boolean, String, and Date can be |
---|
876 | // written as is, other types need converting to String |
---|
877 | def value = rowData[ri][ci] |
---|
878 | |
---|
879 | value = (value instanceof Number | value?.class in [ |
---|
880 | boolean.class, |
---|
881 | String.class, |
---|
882 | Date.class |
---|
883 | ]) ? value : value?.toString() |
---|
884 | |
---|
885 | // write the value (or an empty String if null) to the cell |
---|
886 | cell.setCellValue(value ?: '') |
---|
887 | |
---|
888 | } |
---|
889 | } |
---|
890 | } |
---|
891 | } |
---|
892 | } |
---|