1 | /** |
---|
2 | * AssayService Service |
---|
3 | * |
---|
4 | * @author s.h.sikkema@gmail.com |
---|
5 | * @since 20101216 |
---|
6 | * @package dbnp.studycapturing |
---|
7 | * |
---|
8 | * Revision information: |
---|
9 | * $Rev: 2049 $ |
---|
10 | * $Author: s.h.sikkema@gmail.com $ |
---|
11 | * $Date: 2011-10-06 11:19:21 +0000 (do, 06 okt 2011) $ |
---|
12 | */ |
---|
13 | package dbnp.studycapturing |
---|
14 | |
---|
15 | import org.apache.poi.ss.usermodel.* |
---|
16 | import org.apache.poi.xssf.usermodel.XSSFWorkbook |
---|
17 | import org.apache.poi.hssf.usermodel.HSSFWorkbook |
---|
18 | import org.codehaus.groovy.grails.web.json.JSONObject |
---|
19 | import org.dbnp.gdt.RelTime |
---|
20 | import org.dbnp.gdt.TemplateFieldType |
---|
21 | import java.text.NumberFormat |
---|
22 | import dbnp.authentication.SecUser |
---|
23 | |
---|
24 | class AssayService { |
---|
25 | |
---|
26 | boolean transactional = false |
---|
27 | def authenticationService |
---|
28 | def moduleCommunicationService |
---|
29 | |
---|
30 | /** |
---|
31 | * Collects the assay field names per category in a map as well as the |
---|
32 | * module's measurements. |
---|
33 | * |
---|
34 | * @param assay the assay for which to collect the fields |
---|
35 | * @param samples list of samples to retrieve the field names for. If not given, all samples from the assay are used. |
---|
36 | * @return a map of categories as keys and field names or measurements as |
---|
37 | * values |
---|
38 | */ |
---|
39 | def collectAssayTemplateFields(assay, samples, SecUser remoteUser = null) throws Exception { |
---|
40 | |
---|
41 | def getUsedTemplateFields = { templateEntities -> |
---|
42 | |
---|
43 | // gather all unique and non null template fields that haves values |
---|
44 | templateEntities*.giveFields().flatten().unique().findAll{ field -> |
---|
45 | |
---|
46 | field && templateEntities.any { it?.fieldExists(field.name) && it.getFieldValue(field.name) != null } |
---|
47 | |
---|
48 | }.collect{[name: it.name, comment: it.comment, displayName: it.name + (it.unit ? " ($it.unit)" : '')]} |
---|
49 | } |
---|
50 | |
---|
51 | def moduleError = '', moduleMeasurements = [] |
---|
52 | |
---|
53 | try { |
---|
54 | moduleMeasurements = requestModuleMeasurementNames(assay, remoteUser) |
---|
55 | } catch (e) { |
---|
56 | moduleError = e.message |
---|
57 | } |
---|
58 | |
---|
59 | if( !samples ) |
---|
60 | samples = assay.samples |
---|
61 | |
---|
62 | [ 'Subject Data' : getUsedTemplateFields( samples*."parentSubject".unique() ), |
---|
63 | 'Sampling Event Data' : getUsedTemplateFields( samples*."parentEvent".unique() ), |
---|
64 | 'Sample Data' : getUsedTemplateFields( samples ), |
---|
65 | 'Event Group' : [[name: 'name', comment: 'Name of Event Group', displayName: 'name']], |
---|
66 | 'Module Measurement Data': moduleMeasurements, |
---|
67 | 'Module Error': moduleError |
---|
68 | ] |
---|
69 | |
---|
70 | } |
---|
71 | |
---|
72 | /** |
---|
73 | * Gathers all assay related data, including measurements from the module, |
---|
74 | * into 1 hash map containing: Subject Data, Sampling Event Data, Sample |
---|
75 | * Data, and module specific measurement data. |
---|
76 | * Data from each of the 4 hash map entries are themselves hash maps |
---|
77 | * representing a descriptive header (field name) as key and the data as |
---|
78 | * value. |
---|
79 | * |
---|
80 | * @param assay the assay to collect data for |
---|
81 | * @param fieldMap map with categories as keys and fields as values |
---|
82 | * @param measurementTokens selection of measurementTokens |
---|
83 | * @param samples list of samples for which the data should be retrieved. |
---|
84 | * Defaults to all samples from this assay. |
---|
85 | * @return The assay data structure as described above. |
---|
86 | */ |
---|
87 | def collectAssayData(assay, fieldMap, measurementTokens, samples = null, SecUser remoteUser = null) throws Exception { |
---|
88 | |
---|
89 | def collectFieldValuesForTemplateEntities = { headerFields, templateEntities -> |
---|
90 | |
---|
91 | // return a hash map with for each field name all values from the |
---|
92 | // template entity list |
---|
93 | headerFields.inject([:]) { map, headerField -> |
---|
94 | |
---|
95 | map + [(headerField.displayName): templateEntities.collect { entity -> |
---|
96 | |
---|
97 | // default to an empty string |
---|
98 | def val = '' |
---|
99 | |
---|
100 | if (entity) { |
---|
101 | def field |
---|
102 | try { |
---|
103 | |
---|
104 | val = entity.getFieldValue(headerField.name) |
---|
105 | |
---|
106 | // Convert RelTime fields to human readable strings |
---|
107 | field = entity.getField(headerField.name) |
---|
108 | if (field.type == TemplateFieldType.RELTIME) |
---|
109 | val = new RelTime( val as long ) |
---|
110 | |
---|
111 | } catch (NoSuchFieldException e) { /* pass */ } |
---|
112 | } |
---|
113 | |
---|
114 | (val instanceof Number) ? val : val.toString()}] |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | def getFieldValues = { templateEntities, headerFields, propertyName = '' -> |
---|
119 | |
---|
120 | def returnValue |
---|
121 | |
---|
122 | // if no property name is given, simply collect the fields and |
---|
123 | // values of the template entities themselves |
---|
124 | if (propertyName == '') { |
---|
125 | |
---|
126 | returnValue = collectFieldValuesForTemplateEntities(headerFields, templateEntities) |
---|
127 | |
---|
128 | } else { |
---|
129 | |
---|
130 | // if a property name is given, we'll have to do a bit more work |
---|
131 | // to ensure efficiency. The reason for this is that for a list |
---|
132 | // of template entities, the properties referred to by |
---|
133 | // propertyName can include duplicates. For example, for 10 |
---|
134 | // samples, there may be less than 10 parent subjects. Maybe |
---|
135 | // there's only 1 parent subject. We don't want to collect field |
---|
136 | // values for this single subject 10 times ... |
---|
137 | def fieldValues |
---|
138 | |
---|
139 | // we'll get the unique list of properties to make sure we're |
---|
140 | // not getting the field values for identical template entity |
---|
141 | // properties more then once. |
---|
142 | def uniqueProperties = templateEntities*."$propertyName".unique() |
---|
143 | |
---|
144 | fieldValues = collectFieldValuesForTemplateEntities(headerFields, uniqueProperties) |
---|
145 | |
---|
146 | // prepare a lookup hashMap to be able to map an entities' |
---|
147 | // property (e.g. a sample's parent subject) to an index value |
---|
148 | // from the field values list |
---|
149 | int i = 0 |
---|
150 | def propertyToFieldValueIndexMap = uniqueProperties.inject([:]) { map, item -> map + [(item):i++]} |
---|
151 | |
---|
152 | // prepare the return value so that it has an entry for field |
---|
153 | // name. This will be the column name (second header line). |
---|
154 | returnValue = headerFields*.displayName.inject([:]) { map, item -> map + [(item):[]] } |
---|
155 | |
---|
156 | // finally, fill map the unique field values to the (possibly |
---|
157 | // not unique) template entity properties. In our example with |
---|
158 | // 1 unique parent subject, this means copying that subject's |
---|
159 | // field values to all 10 samples. |
---|
160 | templateEntities.each{ te -> |
---|
161 | |
---|
162 | headerFields*.displayName.each{ |
---|
163 | |
---|
164 | returnValue[it] << fieldValues[it][propertyToFieldValueIndexMap[te[propertyName]]] |
---|
165 | |
---|
166 | } |
---|
167 | |
---|
168 | } |
---|
169 | |
---|
170 | } |
---|
171 | |
---|
172 | returnValue |
---|
173 | |
---|
174 | } |
---|
175 | |
---|
176 | // Find samples and sort by name |
---|
177 | if( !samples ) |
---|
178 | samples = assay.samples.toList().sort { it.name } |
---|
179 | |
---|
180 | def eventFieldMap = [:] |
---|
181 | |
---|
182 | // check whether event group data was requested |
---|
183 | if (fieldMap['Event Group']) { |
---|
184 | |
---|
185 | def names = samples*.parentEventGroup*.name.flatten() |
---|
186 | |
---|
187 | // only set name field when there's actual data |
---|
188 | if (!names.every {!it}) eventFieldMap['name'] = names |
---|
189 | |
---|
190 | } |
---|
191 | |
---|
192 | def moduleError = '', moduleMeasurementData = [:] |
---|
193 | |
---|
194 | if (measurementTokens) { |
---|
195 | |
---|
196 | try { |
---|
197 | moduleMeasurementData = requestModuleMeasurements(assay, measurementTokens, samples, remoteUser) |
---|
198 | } catch (e) { |
---|
199 | moduleMeasurementData = ['error' : ['Module error, module not available or unknown assay'] * samples.size() ] |
---|
200 | moduleError = e.message |
---|
201 | } |
---|
202 | |
---|
203 | } |
---|
204 | |
---|
205 | [ 'Subject Data' : getFieldValues(samples, fieldMap['Subject Data'], 'parentSubject'), |
---|
206 | 'Sampling Event Data' : getFieldValues(samples, fieldMap['Sampling Event Data'], 'parentEvent'), |
---|
207 | 'Sample Data' : getFieldValues(samples, fieldMap['Sample Data']), |
---|
208 | 'Event Group' : eventFieldMap, |
---|
209 | 'Module Measurement Data' : moduleMeasurementData, |
---|
210 | 'Module Error' : moduleError |
---|
211 | ] |
---|
212 | } |
---|
213 | |
---|
214 | /** |
---|
215 | * Prepend data from study to the data structure |
---|
216 | * @param assayData Column wise data structure of samples |
---|
217 | * @param assay Assay object the data should be selected from |
---|
218 | * @param numValues Number of values for this assay |
---|
219 | * @return Extended column wise data structure |
---|
220 | */ |
---|
221 | def prependStudyData( inputData, Assay assay, numValues ) { |
---|
222 | if( !assay ) |
---|
223 | return inputData; |
---|
224 | |
---|
225 | // Retrieve study data |
---|
226 | def studyData =[:] |
---|
227 | assay.parent?.giveFields().each { |
---|
228 | def value = assay.parent.getFieldValue( it.name ) |
---|
229 | if( value ) |
---|
230 | studyData[ it.name ] = [value] * numValues |
---|
231 | } |
---|
232 | |
---|
233 | return [ |
---|
234 | 'Study Data': studyData |
---|
235 | ] + inputData |
---|
236 | } |
---|
237 | |
---|
238 | /** |
---|
239 | * Prepend data from assay to the data structure |
---|
240 | * @param assayData Column wise data structure of samples |
---|
241 | * @param assay Assay object the data should be selected from |
---|
242 | * @param numValues Number of values for this assay |
---|
243 | * @return Extended column wise data structure |
---|
244 | */ |
---|
245 | def prependAssayData( inputData, Assay assay, numValues ) { |
---|
246 | if( !assay ) |
---|
247 | return inputData; |
---|
248 | |
---|
249 | // Retrieve assay data |
---|
250 | def assayData = [:] |
---|
251 | assay.giveFields().each { |
---|
252 | def value = assay.getFieldValue( it.name ) |
---|
253 | if( value ) |
---|
254 | assayData[ it.name ] = [value] * numValues |
---|
255 | } |
---|
256 | |
---|
257 | return [ |
---|
258 | 'Assay Data': assayData |
---|
259 | ] + inputData |
---|
260 | } |
---|
261 | |
---|
262 | /** |
---|
263 | * Retrieves measurement names from the module through a rest call |
---|
264 | * |
---|
265 | * @param consumer the url of the module |
---|
266 | * @param path path of the rest call to the module |
---|
267 | * @return |
---|
268 | */ |
---|
269 | def requestModuleMeasurementNames(assay, SecUser remoteUser = null) { |
---|
270 | |
---|
271 | def moduleUrl = assay.module.url |
---|
272 | |
---|
273 | def path = moduleUrl + "/rest/getMeasurements/query" |
---|
274 | def query = "assayToken=${assay.giveUUID()}" |
---|
275 | def jsonArray |
---|
276 | |
---|
277 | try { |
---|
278 | jsonArray = moduleCommunicationService.callModuleMethod(moduleUrl, path, query, "POST", remoteUser) |
---|
279 | } catch (e) { |
---|
280 | throw new Exception("An error occured while trying to get the measurement tokens from the $assay.module.name. \ |
---|
281 | This means the module containing the measurement data is not available right now. Please try again \ |
---|
282 | later or notify the system administrator if the problem persists. URL: $path?$query.") |
---|
283 | } |
---|
284 | |
---|
285 | def result = jsonArray.collect { |
---|
286 | if( it == JSONObject.NULL ) |
---|
287 | return "" |
---|
288 | else |
---|
289 | return it.toString() |
---|
290 | } |
---|
291 | |
---|
292 | return result |
---|
293 | } |
---|
294 | |
---|
295 | /** |
---|
296 | * Retrieves module measurement data through a rest call to the module |
---|
297 | * |
---|
298 | * @param assay Assay for which the module measurements should be retrieved |
---|
299 | * @param measurementTokens List with the names of the fields to be retrieved. Format: [ 'measurementName1', 'measurementName2' ] |
---|
300 | * @param samples Samples to collect measurements for |
---|
301 | * @return |
---|
302 | */ |
---|
303 | def requestModuleMeasurements(assay, inputMeasurementTokens, samples, SecUser remoteUser = null) { |
---|
304 | |
---|
305 | def moduleUrl = assay.module.url |
---|
306 | |
---|
307 | def tokenString = '' |
---|
308 | |
---|
309 | inputMeasurementTokens.each{ |
---|
310 | tokenString+="&measurementToken=${it.encodeAsURL()}" |
---|
311 | } |
---|
312 | |
---|
313 | def path = moduleUrl + "/rest/getMeasurementData/query" |
---|
314 | |
---|
315 | def query = "assayToken=$assay.assayUUID$tokenString" |
---|
316 | |
---|
317 | def sampleTokens = [], measurementTokens = [], moduleData = [] |
---|
318 | |
---|
319 | try { |
---|
320 | (sampleTokens, measurementTokens, moduleData) = moduleCommunicationService.callModuleMethod(moduleUrl, path, query, "POST", remoteUser) |
---|
321 | } catch (e) { |
---|
322 | throw new Exception("An error occured while trying to get the measurement data from the $assay.module.name. \ |
---|
323 | This means the module containing the measurement data is not available right now. Please try again \ |
---|
324 | later or notify the system administrator if the problem persists. URL: $path?$query.") |
---|
325 | } |
---|
326 | |
---|
327 | if (!sampleTokens?.size()) return [] |
---|
328 | |
---|
329 | // Convert the three different maps into a map like: |
---|
330 | // |
---|
331 | // [ "measurement 1": [ value1, value2, value3 ], |
---|
332 | // "measurement 2": [ value4, value5, value6 ] ] |
---|
333 | // |
---|
334 | // The returned values should be in the same order as the given samples-list |
---|
335 | def map = [:] |
---|
336 | def numSampleTokens = sampleTokens.size(); |
---|
337 | |
---|
338 | measurementTokens.eachWithIndex { measurementToken, measurementIndex -> |
---|
339 | def measurements = []; |
---|
340 | samples.each { sample -> |
---|
341 | |
---|
342 | // Do measurements for this sample exist? If not, a null value is returned |
---|
343 | // for this sample. Otherwise, the measurement is looked up in the list with |
---|
344 | // measurements, based on the sample token |
---|
345 | if( sampleTokens.collect{ it.toString() }.contains( sample.giveUUID() ) ) { |
---|
346 | def tokenIndex = sampleTokens.indexOf( sample.giveUUID() ); |
---|
347 | def valueIndex = measurementIndex * numSampleTokens + tokenIndex; |
---|
348 | |
---|
349 | // If the module data is in the wrong format, show an error in the log file |
---|
350 | // and return a null value for this measurement. |
---|
351 | if( valueIndex >= moduleData.size() ) { |
---|
352 | log.error "Module measurements given by module " + assay.module.name + " are not in the right format: " + measurementTokens?.size() + " measurements, " + sampleTokens?.size() + " samples, " + moduleData?.size() + " values" |
---|
353 | measurements << null |
---|
354 | } else { |
---|
355 | |
---|
356 | def val |
---|
357 | def measurement = moduleData[ valueIndex ] |
---|
358 | |
---|
359 | if (measurement == JSONObject.NULL) val = "" |
---|
360 | else if (measurement instanceof Number) val = measurement |
---|
361 | else if (measurement.isDouble()) val = measurement.toDouble() |
---|
362 | else val = measurement.toString() |
---|
363 | measurements << val |
---|
364 | } |
---|
365 | } else { |
---|
366 | measurements << null |
---|
367 | } |
---|
368 | } |
---|
369 | map[ measurementToken.toString() ] = measurements |
---|
370 | } |
---|
371 | |
---|
372 | return map; |
---|
373 | } |
---|
374 | |
---|
375 | /** |
---|
376 | * Merges the data from multiple studies into a structure that can be exported to an excel file. The format for each assay is |
---|
377 | * |
---|
378 | * [Category1: |
---|
379 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
380 | * Category2: |
---|
381 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
382 | * |
---|
383 | * Where the category describes the category of data that is presented (e.g. subject, sample etc.) and the column names describe |
---|
384 | * the fields that are present. Each entry in the lists shows the value for that column for an entity. In this case, 3 entities are described. |
---|
385 | * Each field should give values for all entities, so the length of all value-lists should be the same. |
---|
386 | * |
---|
387 | * Example: If the following input is given (2 assays) |
---|
388 | * |
---|
389 | * [ |
---|
390 | * [Category1: |
---|
391 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
392 | * Category2: |
---|
393 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]], |
---|
394 | * [Category1: |
---|
395 | * [Column1: [16,17], Column6: [18,19]], |
---|
396 | * Category3: |
---|
397 | * [Column3: [20,21], Column8: [22,23]]] |
---|
398 | * ] |
---|
399 | * |
---|
400 | * the output will be (5 entries for each column, empty values for fields that don't exist in some assays) |
---|
401 | * |
---|
402 | * [ |
---|
403 | * [Category1: |
---|
404 | * [Column1: [1,2,3,16,17], Column2: [4,5,6,,], Column6: [,,,18,19]], |
---|
405 | * Category2: |
---|
406 | * [Column3: [7,8,9,,], Column4: [10,11,12,,], Column5: [13,14,15,,]], |
---|
407 | * Category3: |
---|
408 | * [Column3: [,,,20,21], Column8: [,,,22,23]] |
---|
409 | * ] |
---|
410 | * |
---|
411 | * |
---|
412 | * @param columnWiseAssayData List with each entry being the column wise data of an assay. The format for each |
---|
413 | * entry is described above |
---|
414 | * @return Hashmap Combined assay data, in the same structure as each input entry. Empty values are given as an empty string. |
---|
415 | * So for input entries |
---|
416 | */ |
---|
417 | def mergeColumnWiseDataOfMultipleStudies(def columnWiseAssayData) { |
---|
418 | // Compute the number of values that is expected for each assay. This number is |
---|
419 | // used later on to determine the number of empty fields to add if a field is not present in this |
---|
420 | // assay |
---|
421 | def numValues = columnWiseAssayData.collect { assay -> |
---|
422 | for( cat in assay ) { |
---|
423 | if( cat ) { |
---|
424 | for( field in cat.value ) { |
---|
425 | if( field?.value?.size() > 0 ) { |
---|
426 | return field.value.size(); |
---|
427 | } |
---|
428 | } |
---|
429 | } |
---|
430 | } |
---|
431 | |
---|
432 | return 0; |
---|
433 | } |
---|
434 | |
---|
435 | // Merge categories from all assays. Create a list for all categories |
---|
436 | def categories = columnWiseAssayData*.keySet().toList().flatten().unique(); |
---|
437 | def mergedColumnWiseData = [:] |
---|
438 | categories.each { category -> |
---|
439 | // Only work with this category for all assays |
---|
440 | def categoryData = columnWiseAssayData*.getAt( category ); |
---|
441 | |
---|
442 | // Find the different fields in all assays |
---|
443 | def categoryFields = categoryData.findAll{ it }*.keySet().toList().flatten().unique(); |
---|
444 | |
---|
445 | // Find data for all assays for these fields. If the fields do not exist, return an empty string |
---|
446 | def categoryValues = [:] |
---|
447 | categoryFields.each { field -> |
---|
448 | categoryValues[ field ] = []; |
---|
449 | |
---|
450 | // Loop through all assays |
---|
451 | categoryData.eachWithIndex { assayValues, idx -> |
---|
452 | if( assayValues && assayValues.containsKey( field ) ) { |
---|
453 | // Append the values if they exist |
---|
454 | categoryValues[ field ] += assayValues[ field ]; |
---|
455 | } else { |
---|
456 | // Append empty string for each entity if the field doesn't exist |
---|
457 | categoryValues[ field ] += [""] * numValues[ idx ] |
---|
458 | } |
---|
459 | } |
---|
460 | } |
---|
461 | |
---|
462 | mergedColumnWiseData[ category ] = categoryValues |
---|
463 | } |
---|
464 | |
---|
465 | return mergedColumnWiseData; |
---|
466 | } |
---|
467 | |
---|
468 | /** |
---|
469 | * Merges the data from multiple studies into a structure that can be exported to an excel file. The format for each assay is |
---|
470 | * |
---|
471 | * [Category1: |
---|
472 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
473 | * Category2: |
---|
474 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
475 | * |
---|
476 | * Where the category describes the category of data that is presented (e.g. subject, sample etc.) and the column names describe |
---|
477 | * the fields that are present. Each entry in the lists shows the value for that column for an entity. In this case, 3 entities are described. |
---|
478 | * Each field should give values for all entities, so the length of all value-lists should be the same. |
---|
479 | * |
---|
480 | * Example: If the following input is given (2 assays) |
---|
481 | * |
---|
482 | * [ |
---|
483 | * [Category1: |
---|
484 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
485 | * Category2: |
---|
486 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]], |
---|
487 | * [Category1: |
---|
488 | * [Column1: [16,17], Column6: [18,19]], |
---|
489 | * Category3: |
---|
490 | * [Column3: [20,21], Column8: [22,23]]] |
---|
491 | * ] |
---|
492 | * |
---|
493 | * the output will be (5 entries for each column, empty values for fields that don't exist in some assays) |
---|
494 | * |
---|
495 | * [ |
---|
496 | * [Category1: |
---|
497 | * [Column1: [1,2,3,16,17], Column2: [4,5,6,,], Column6: [,,,18,19]], |
---|
498 | * Category2: |
---|
499 | * [Column3: [7,8,9,,], Column4: [10,11,12,,], Column5: [13,14,15,,]], |
---|
500 | * Category3: |
---|
501 | * [Column3: [,,,20,21], Column8: [,,,22,23]] |
---|
502 | * ] |
---|
503 | * |
---|
504 | * |
---|
505 | * @param columnWiseAssayData List with each entry being the column wise data of an assay. The format for each |
---|
506 | * entry is described above. The data MUST have a category named 'Sample Data' and in that map a field |
---|
507 | * named 'id'. This field is used for matching rows. However, the column is removed, unless |
---|
508 | * removeIdColumn is set to false |
---|
509 | * @param removeIdColumn If set to true (default), the values for the sample id are removed from the output. |
---|
510 | * @return Hashmap Combined assay data, in the same structure as each input entry. Empty values are given as an empty string. |
---|
511 | * So for input entries |
---|
512 | */ |
---|
513 | def mergeColumnWiseDataOfMultipleStudiesForASetOfSamples(def columnWiseAssayData, boolean removeIdColumn = true ) { |
---|
514 | // Merge all assays and studies into one list |
---|
515 | def mergedData = mergeColumnWiseDataOfMultipleStudies( columnWiseAssayData ) |
---|
516 | |
---|
517 | // A map with keys being the sampleIds, and the values are the indices of that sample in the values list |
---|
518 | def idMap = [:] |
---|
519 | |
---|
520 | // A map with the key being an index in the value list, and the value is the index the values should be copied to |
---|
521 | def convertMap = [:] |
---|
522 | |
---|
523 | for( int i = 0; i < mergedData[ "Sample Data" ][ "id" ].size(); i++ ) { |
---|
524 | def id = mergedData[ "Sample Data" ][ "id" ][ i ]; |
---|
525 | |
---|
526 | if( idMap[ id ] == null ) { |
---|
527 | // This id occurs for the first time |
---|
528 | idMap[ id ] = i; |
---|
529 | convertMap[ i ] = i; |
---|
530 | } else { |
---|
531 | convertMap[ i ] = idMap[ id ]; |
---|
532 | } |
---|
533 | } |
---|
534 | |
---|
535 | /* |
---|
536 | * Example output: |
---|
537 | * idMap: [ 12: 0, 24: 1, 26: 3 ] |
---|
538 | * convertMap: [ 0: 0, 1: 1, 2: 0, 3: 3, 4: 3 ] |
---|
539 | * (meaning: rows 0, 1 and 3 should remain, row 2 should be merged with row 0 and row 4 should be merged with row 3) |
---|
540 | * |
---|
541 | * The value in the convertMap is always lower than its key. So we sort the convertMap on the keys. That way, we can |
---|
542 | * loop through the values and remove the row that has been merged. |
---|
543 | */ |
---|
544 | |
---|
545 | convertMap.sort { a, b -> b.key <=> a.key }.each { |
---|
546 | def row = it.key; |
---|
547 | def mergeWith = it.value; |
---|
548 | |
---|
549 | if( row != mergeWith ) { |
---|
550 | // Combine the data on row [row] with the data on row [mergeWith] |
---|
551 | |
---|
552 | mergedData.each { |
---|
553 | def cat = it.key; def fields = it.value; |
---|
554 | fields.each { fieldData -> |
---|
555 | def fieldName = fieldData.key; |
---|
556 | def fieldValues = fieldData.value; |
---|
557 | |
---|
558 | // If one of the fields to merge is empty, use the other one |
---|
559 | // Otherwise the values should be the same (e.g. study, subject, sample data) |
---|
560 | fieldValues[ mergeWith ] = ( fieldValues[ mergeWith ] == null || fieldValues[ mergeWith ] == "" ) ? fieldValues[ row ] : fieldValues[ mergeWith ] |
---|
561 | |
---|
562 | // Remove the row from this list |
---|
563 | fieldValues.remove( row ); |
---|
564 | } |
---|
565 | } |
---|
566 | } |
---|
567 | } |
---|
568 | |
---|
569 | // Remove sample id if required |
---|
570 | if( removeIdColumn ) |
---|
571 | mergedData[ "Sample Data" ].remove( "id" ); |
---|
572 | |
---|
573 | return mergedData |
---|
574 | } |
---|
575 | |
---|
576 | /** |
---|
577 | * Converts column |
---|
578 | * @param columnData multidimensional map containing column data. |
---|
579 | * On the top level, the data must be grouped by category. Each key is the |
---|
580 | * category title and the values are maps representing the columns. Each |
---|
581 | * column also has a title (its key) and a list of values. Columns must be |
---|
582 | * equally sized. |
---|
583 | * |
---|
584 | * For example, consider the following map: |
---|
585 | * [Category1: |
---|
586 | * [Column1: [1,2,3], Column2: [4,5,6]], |
---|
587 | * Category2: |
---|
588 | * [Column3: [7,8,9], Column4: [10,11,12], Column5: [13,14,15]]] |
---|
589 | * |
---|
590 | * which will be written as: |
---|
591 | * |
---|
592 | * | Category1 | | Category2 | | | |
---|
593 | * | Column1 | Column2 | Column3 | Column4 | Column5 | |
---|
594 | * | 1 | 4 | 7 | 10 | 13 | |
---|
595 | * | 2 | 5 | 8 | 11 | 14 | |
---|
596 | * | 3 | 6 | 9 | 12 | 15 | |
---|
597 | * |
---|
598 | * @return row wise data |
---|
599 | */ |
---|
600 | def convertColumnToRowStructure(columnData) { |
---|
601 | |
---|
602 | // check if all columns have the dimensionality 2 |
---|
603 | if (columnData.every { it.value.every { it.value instanceof ArrayList } }) { |
---|
604 | |
---|
605 | def headers = [[],[]] |
---|
606 | |
---|
607 | columnData.each { category -> |
---|
608 | |
---|
609 | if (category.value.size()) { |
---|
610 | |
---|
611 | // put category keys into first row separated by null values |
---|
612 | // wherever there are > 1 columns per category |
---|
613 | headers[0] += [category.key] + [null] * (category.value.size() - 1) |
---|
614 | |
---|
615 | // put non-category column headers into 2nd row |
---|
616 | headers[1] += category.value.collect{it.key} |
---|
617 | |
---|
618 | } |
---|
619 | |
---|
620 | } |
---|
621 | |
---|
622 | def d = [] |
---|
623 | |
---|
624 | // add all column wise data into 'd' |
---|
625 | columnData.each { it.value.each { d << it.value } } |
---|
626 | |
---|
627 | // transpose d into row wise data and combine with header rows |
---|
628 | headers + d.transpose() |
---|
629 | } else [] |
---|
630 | |
---|
631 | } |
---|
632 | |
---|
633 | /** |
---|
634 | * Export column wise data in Excel format to a stream. |
---|
635 | * |
---|
636 | * @param columnData Multidimensional map containing column data |
---|
637 | * @param outputStream Stream to write to |
---|
638 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
639 | * @return |
---|
640 | */ |
---|
641 | def exportColumnWiseDataToExcelFile(columnData, outputStream, useOfficeOpenXML = true) { |
---|
642 | |
---|
643 | // transform data into row based structure for easy writing |
---|
644 | def rows = convertColumnToRowStructure(columnData) |
---|
645 | |
---|
646 | if (rows) { |
---|
647 | |
---|
648 | exportRowWiseDataToExcelFile(rows, outputStream, useOfficeOpenXML) |
---|
649 | |
---|
650 | } else { |
---|
651 | |
---|
652 | throw new Exception('Wrong column data format.') |
---|
653 | |
---|
654 | } |
---|
655 | |
---|
656 | } |
---|
657 | |
---|
658 | /** |
---|
659 | * Export row wise data in Excel format to a stream |
---|
660 | * |
---|
661 | * @param rowData List of lists containing for each row all cell values |
---|
662 | * @param outputStream Stream to write to |
---|
663 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
664 | * @return |
---|
665 | */ |
---|
666 | def exportRowWiseDataToExcelFile(rowData, outputStream, useOfficeOpenXML = true) { |
---|
667 | Workbook wb = useOfficeOpenXML ? new XSSFWorkbook() : new HSSFWorkbook() |
---|
668 | Sheet sheet = wb.createSheet() |
---|
669 | |
---|
670 | exportRowWiseDataToExcelSheet( rowData, sheet ); |
---|
671 | |
---|
672 | wb.write(outputStream) |
---|
673 | outputStream.close() |
---|
674 | } |
---|
675 | |
---|
676 | /** |
---|
677 | * Export row wise data in CSV to a stream. All values are surrounded with |
---|
678 | * double quotes (" "). |
---|
679 | * |
---|
680 | * @param rowData List of lists containing for each row all cell values |
---|
681 | * @param outputStream Stream to write to |
---|
682 | * @return |
---|
683 | */ |
---|
684 | def exportRowWiseDataToCSVFile(rowData, outputStream, outputDelimiter = '\t', locale = java.util.Locale.US) { |
---|
685 | |
---|
686 | def formatter = NumberFormat.getNumberInstance(locale) |
---|
687 | formatter.setGroupingUsed false // we don't want grouping (thousands) separators |
---|
688 | |
---|
689 | outputStream << rowData.collect { row -> |
---|
690 | row.collect{ |
---|
691 | |
---|
692 | // omit quotes in case of numeric values and format using chosen locale |
---|
693 | if (it instanceof Number) return formatter.format(it) |
---|
694 | |
---|
695 | def s = it?.toString() ?: '' |
---|
696 | |
---|
697 | def addQuotes = false |
---|
698 | |
---|
699 | // escape double quotes with double quotes if they exist and |
---|
700 | // enable surround with quotes |
---|
701 | if (s.contains('"')) { |
---|
702 | addQuotes = true |
---|
703 | s = s.replaceAll('"','""') |
---|
704 | } else { |
---|
705 | // enable surround with quotes in case of comma's |
---|
706 | if (s.contains(',') || s.contains('\n')) addQuotes = true |
---|
707 | } |
---|
708 | |
---|
709 | addQuotes ? "\"$s\"" : s |
---|
710 | |
---|
711 | }.join(outputDelimiter) |
---|
712 | }.join('\n') |
---|
713 | |
---|
714 | outputStream.close() |
---|
715 | } |
---|
716 | |
---|
717 | /** |
---|
718 | * Export row wise data for multiple assays in Excel format (separate sheets) to a stream |
---|
719 | * |
---|
720 | * @param rowData List of structures with rowwise data for each assay |
---|
721 | * @param outputStream Stream to write to |
---|
722 | * @param useOfficeOpenXML Flag to specify xlsx (standard) or xls output |
---|
723 | * @return |
---|
724 | */ |
---|
725 | def exportRowWiseDataForMultipleAssaysToExcelFile(assayData, outputStream, useOfficeOpenXML = true) { |
---|
726 | Workbook wb = useOfficeOpenXML ? new XSSFWorkbook() : new HSSFWorkbook() |
---|
727 | |
---|
728 | assayData.each { rowData -> |
---|
729 | Sheet sheet = wb.createSheet() |
---|
730 | |
---|
731 | exportRowWiseDataToExcelSheet( rowData, sheet ); |
---|
732 | } |
---|
733 | |
---|
734 | wb.write(outputStream) |
---|
735 | outputStream.close() |
---|
736 | } |
---|
737 | |
---|
738 | /** |
---|
739 | * Export row wise data in Excel format to a given sheet in an excel workbook |
---|
740 | * |
---|
741 | * @param rowData List of lists containing for each row all cell values |
---|
742 | * @param sheet Excel sheet to append the |
---|
743 | * @return |
---|
744 | */ |
---|
745 | def exportRowWiseDataToExcelSheet(rowData, Sheet sheet) { |
---|
746 | // create all rows |
---|
747 | rowData.size().times { sheet.createRow it } |
---|
748 | |
---|
749 | sheet.eachWithIndex { Row row, ri -> |
---|
750 | if( rowData[ ri ] ) { |
---|
751 | // create appropriate number of cells for this row |
---|
752 | rowData[ri].size().times { row.createCell it } |
---|
753 | |
---|
754 | row.eachWithIndex { Cell cell, ci -> |
---|
755 | |
---|
756 | // Numbers and values of type boolean, String, and Date can be |
---|
757 | // written as is, other types need converting to String |
---|
758 | def value = rowData[ri][ci] |
---|
759 | |
---|
760 | value = (value instanceof Number | value?.class in [boolean.class, String.class, Date.class]) ? value : value?.toString() |
---|
761 | |
---|
762 | // write the value (or an empty String if null) to the cell |
---|
763 | cell.setCellValue(value ?: '') |
---|
764 | |
---|
765 | } |
---|
766 | } |
---|
767 | } |
---|
768 | } |
---|
769 | } |
---|