[147] | 1 | /** |
---|
| 2 | * Importer service |
---|
| 3 | * |
---|
| 4 | * The importer service handles the import of tabular, comma delimited and Excel format |
---|
| 5 | * based files. |
---|
| 6 | * |
---|
[1417] | 7 | * @package importer |
---|
| 8 | * @author t.w.abma@umcutrecht.nl |
---|
| 9 | * @since 20100126 |
---|
[147] | 10 | * |
---|
| 11 | * Revision information: |
---|
| 12 | * $Rev: 1677 $ |
---|
| 13 | * $Author: robert@isdat.nl $ |
---|
| 14 | * $Date: 2011-03-28 10:35:23 +0000 (ma, 28 mrt 2011) $ |
---|
| 15 | */ |
---|
| 16 | package dbnp.importer |
---|
[1591] | 17 | |
---|
[1457] | 18 | import org.dbnp.gdt.* |
---|
[1087] | 19 | import org.apache.poi.ss.usermodel.* |
---|
[1426] | 20 | import dbnp.studycapturing.* |
---|
[147] | 21 | |
---|
| 22 | class ImporterService { |
---|
[1421] | 23 | def authenticationService |
---|
[147] | 24 | |
---|
[1608] | 25 | static transactional = false |
---|
[147] | 26 | |
---|
[1417] | 27 | /** |
---|
| 28 | * @param is input stream representing the (workbook) resource |
---|
| 29 | * @return high level representation of the workbook |
---|
| 30 | */ |
---|
| 31 | Workbook getWorkbook(InputStream is) { |
---|
| 32 | WorkbookFactory.create(is) |
---|
| 33 | } |
---|
[147] | 34 | |
---|
[1417] | 35 | /** |
---|
| 36 | * @param wb high level representation of the workbook |
---|
| 37 | * @param sheetindex sheet to use within the workbook |
---|
| 38 | * @return header representation as a MappingColumn hashmap |
---|
| 39 | */ |
---|
| 40 | def getHeader(Workbook wb, int sheetindex, int headerrow, int datamatrix_start, theEntity = null) { |
---|
| 41 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 42 | def sheetrow = sheet.getRow(datamatrix_start) |
---|
| 43 | //def header = [] |
---|
[1515] | 44 | def header = [] |
---|
[1417] | 45 | def df = new DataFormatter() |
---|
| 46 | def property = new String() |
---|
[169] | 47 | |
---|
[1417] | 48 | //for (Cell c: sheet.getRow(datamatrix_start)) { |
---|
[147] | 49 | |
---|
[1417] | 50 | (0..sheetrow.getLastCellNum() - 1).each { columnindex -> |
---|
[655] | 51 | |
---|
[1417] | 52 | //def index = c.getColumnIndex() |
---|
| 53 | def datamatrix_celltype = sheet.getRow(datamatrix_start).getCell(columnindex, Row.CREATE_NULL_AS_BLANK).getCellType() |
---|
| 54 | def datamatrix_celldata = df.formatCellValue(sheet.getRow(datamatrix_start).getCell(columnindex)) |
---|
| 55 | def datamatrix_cell = sheet.getRow(datamatrix_start).getCell(columnindex) |
---|
| 56 | def headercell = sheet.getRow(headerrow - 1 + sheet.getFirstRowNum()).getCell(columnindex) |
---|
| 57 | def tft = TemplateFieldType.STRING //default templatefield type |
---|
[634] | 58 | |
---|
[1417] | 59 | // Check for every celltype, currently redundant code, but possibly this will be |
---|
| 60 | // a piece of custom code for every cell type like specific formatting |
---|
[534] | 61 | |
---|
[1417] | 62 | switch (datamatrix_celltype) { |
---|
| 63 | case Cell.CELL_TYPE_STRING: |
---|
[1553] | 64 | //parse cell value as double |
---|
[1417] | 65 | def doubleBoolean = true |
---|
| 66 | def fieldtype = TemplateFieldType.STRING |
---|
[534] | 67 | |
---|
[1553] | 68 | // is this string perhaps a double? |
---|
[1417] | 69 | try { |
---|
| 70 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
[1609] | 71 | } catch (NumberFormatException nfe) { |
---|
| 72 | doubleBoolean = false |
---|
| 73 | } |
---|
[1417] | 74 | finally { |
---|
| 75 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
| 76 | } |
---|
[545] | 77 | |
---|
[1417] | 78 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
[1553] | 79 | templatefieldtype: fieldtype, |
---|
| 80 | index: columnindex, |
---|
| 81 | entityclass: theEntity, |
---|
| 82 | property: property); |
---|
[634] | 83 | |
---|
[1417] | 84 | break |
---|
| 85 | case Cell.CELL_TYPE_NUMERIC: |
---|
| 86 | def fieldtype = TemplateFieldType.LONG |
---|
| 87 | def doubleBoolean = true |
---|
| 88 | def longBoolean = true |
---|
[634] | 89 | |
---|
[1553] | 90 | // is this cell really an integer? |
---|
[1417] | 91 | try { |
---|
| 92 | Long.valueOf(datamatrix_celldata) |
---|
[1609] | 93 | } catch (NumberFormatException nfe) { |
---|
| 94 | longBoolean = false |
---|
| 95 | } |
---|
[1417] | 96 | finally { |
---|
| 97 | if (longBoolean) fieldtype = TemplateFieldType.LONG |
---|
| 98 | } |
---|
[634] | 99 | |
---|
[1553] | 100 | // it's not an long, perhaps a double? |
---|
[1417] | 101 | if (!longBoolean) |
---|
| 102 | try { |
---|
| 103 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
[1609] | 104 | } catch (NumberFormatException nfe) { |
---|
| 105 | doubleBoolean = false |
---|
| 106 | } |
---|
[1417] | 107 | finally { |
---|
| 108 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
| 109 | } |
---|
[706] | 110 | |
---|
[1417] | 111 | if (DateUtil.isCellDateFormatted(datamatrix_cell)) fieldtype = TemplateFieldType.DATE |
---|
[147] | 112 | |
---|
[1417] | 113 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
[1553] | 114 | templatefieldtype: fieldtype, |
---|
| 115 | index: columnindex, |
---|
| 116 | entityclass: theEntity, |
---|
| 117 | property: property); |
---|
[1417] | 118 | break |
---|
| 119 | case Cell.CELL_TYPE_BLANK: |
---|
| 120 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
[1553] | 121 | templatefieldtype: TemplateFieldType.STRING, |
---|
| 122 | index: columnindex, |
---|
| 123 | entityclass: theEntity, |
---|
| 124 | property: property); |
---|
[1417] | 125 | break |
---|
| 126 | default: |
---|
| 127 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
[1553] | 128 | templatefieldtype: TemplateFieldType.STRING, |
---|
| 129 | index: columnindex, |
---|
| 130 | entityclass: theEntity, |
---|
| 131 | property: property); |
---|
[1417] | 132 | break |
---|
| 133 | } // end of switch |
---|
| 134 | } // end of cell loop |
---|
| 135 | return header |
---|
| 136 | } |
---|
[169] | 137 | |
---|
[1417] | 138 | /** |
---|
| 139 | * This method is meant to return a matrix of the rows and columns |
---|
| 140 | * used in the preview |
---|
| 141 | * |
---|
| 142 | * @param wb workbook object |
---|
| 143 | * @param sheetindex sheet index used |
---|
| 144 | * @param rows amount of rows returned |
---|
| 145 | * @return two dimensional array (matrix) of Cell objects |
---|
| 146 | */ |
---|
| 147 | Object[][] getDatamatrix(Workbook wb, header, int sheetindex, int datamatrix_start, int count) { |
---|
| 148 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 149 | def rows = [] |
---|
| 150 | def df = new DataFormatter() |
---|
[169] | 151 | |
---|
[1417] | 152 | count = (count < sheet.getLastRowNum()) ? count : sheet.getLastRowNum() |
---|
[1122] | 153 | |
---|
[1417] | 154 | // walk through all rows |
---|
| 155 | ((datamatrix_start + sheet.getFirstRowNum())..count).each { rowindex -> |
---|
| 156 | def row = [] |
---|
[169] | 157 | |
---|
[1417] | 158 | (0..header.size() - 1).each { columnindex -> |
---|
[1553] | 159 | if (sheet.getRow(rowindex)) |
---|
| 160 | row.add( sheet.getRow(rowindex).getCell(columnindex, Row.CREATE_NULL_AS_BLANK) ) |
---|
[1417] | 161 | } |
---|
| 162 | |
---|
| 163 | rows.add(row) |
---|
| 164 | } |
---|
| 165 | |
---|
| 166 | return rows |
---|
[1122] | 167 | } |
---|
[169] | 168 | |
---|
[1417] | 169 | /** |
---|
| 170 | * This method will move a file to a new location. |
---|
| 171 | * |
---|
| 172 | * @param file File object to move |
---|
| 173 | * @param folderpath folder to move the file to |
---|
| 174 | * @param filename (new) filename to give |
---|
| 175 | * @return if file has been moved succesful, the new path and filename will be returned, otherwise an empty string will be returned |
---|
| 176 | */ |
---|
| 177 | def moveFile(File file, String folderpath, String filename) { |
---|
| 178 | try { |
---|
| 179 | def rnd = ""; //System.currentTimeMillis() |
---|
| 180 | file.transferTo(new File(folderpath, rnd + filename)) |
---|
| 181 | return folderpath + filename |
---|
| 182 | } catch (Exception exception) { |
---|
| 183 | log.error "File move error, ${exception}" |
---|
| 184 | return "" |
---|
| 185 | } |
---|
| 186 | } |
---|
[169] | 187 | |
---|
[1417] | 188 | /** |
---|
| 189 | * @return random numeric value |
---|
| 190 | */ |
---|
| 191 | def random = { |
---|
| 192 | return System.currentTimeMillis() + Runtime.runtime.freeMemory() |
---|
| 193 | } |
---|
| 194 | |
---|
[1609] | 195 | |
---|
[1417] | 196 | /** |
---|
[1591] | 197 | * Retrieves records with sample, subject, samplingevent etc. from a study |
---|
| 198 | * @param s Study to retrieve records from |
---|
| 199 | * @return A list with hashmaps [ 'objects': [ 'Sample': .., 'Subject': .., 'SamplingEvent': .., 'Event': '.. ], 'templates': [], 'templateCombination': .. ] |
---|
| 200 | */ |
---|
| 201 | protected def getRecords( Study s ) { |
---|
| 202 | def records = []; |
---|
[1609] | 203 | |
---|
[1591] | 204 | s.samples?.each { |
---|
| 205 | def record = [ 'objects': retrieveEntitiesBySample( it ) ]; |
---|
[1609] | 206 | |
---|
[1591] | 207 | def templates = [:] |
---|
| 208 | def templateCombination = []; |
---|
| 209 | record.objects.each { entity -> |
---|
| 210 | templates[ entity.key ] = entity.value?.template |
---|
| 211 | if( entity.value?.template ) |
---|
| 212 | templateCombination << entity.key + ": " + entity.value?.template?.name; |
---|
| 213 | } |
---|
[1609] | 214 | |
---|
[1591] | 215 | record.templates = templates; |
---|
| 216 | record.templateCombination = templateCombination.join( ', ' ) |
---|
[1609] | 217 | |
---|
[1591] | 218 | records << record |
---|
| 219 | } |
---|
[1609] | 220 | |
---|
[1591] | 221 | return records; |
---|
| 222 | } |
---|
[1609] | 223 | |
---|
[1591] | 224 | /** |
---|
| 225 | * Returns a subject, event and samplingEvent that belong to this sample |
---|
| 226 | * @param s Sample to find the information for |
---|
| 227 | * @return |
---|
| 228 | */ |
---|
| 229 | protected retrieveEntitiesBySample( Sample s ) { |
---|
| 230 | return [ |
---|
| 231 | 'Sample': s, |
---|
| 232 | 'Subject': s?.parentSubject, |
---|
| 233 | 'SamplingEvent': s?.parentEvent, |
---|
[1610] | 234 | 'Event': s?.parentEventGroup?.events?.toList()?.getAt(0) |
---|
[1591] | 235 | ] |
---|
| 236 | } |
---|
[1609] | 237 | |
---|
[1591] | 238 | /** |
---|
| 239 | * Imports data from a workbook into a list of ImportRecords. If some entities are already in the database, |
---|
| 240 | * these records are updated. |
---|
| 241 | * |
---|
| 242 | * This method is capable of importing Subject, Samples, SamplingEvents and Events |
---|
| 243 | * |
---|
| 244 | * @param templates Map of templates, identified by their entity as a key. For example: [ Subject: Template x, Sample: Template y ] |
---|
| 245 | * @param wb Excel workbook to import |
---|
| 246 | * @param sheetindex Number of the sheet to import data from |
---|
| 247 | * @param rowindex Row to start importing from. |
---|
| 248 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
| 249 | * @param parent Study to import all data into. Is used for determining which sample/event/subject/assay to update |
---|
| 250 | * @param createAllEntities If set to true, the system will also create objects for entities that have no data imported, but do have |
---|
| 251 | * a template assigned |
---|
| 252 | * @return List List with two entries: |
---|
| 253 | * 0 List with ImportRecords, one for each row in the excelsheet |
---|
| 254 | * 1 List with ImportCell objects, mentioning the cells that could not be correctly imported |
---|
| 255 | * (because the value in the excelsheet can't be entered into the template field) |
---|
| 256 | */ |
---|
| 257 | def importOrUpdateDataBySampleIdentifier( def templates, Workbook wb, int sheetindex, int rowindex, def mcmap, Study parent = null, boolean createAllEntities = true ) { |
---|
| 258 | if( !mcmap ) |
---|
| 259 | return; |
---|
[1609] | 260 | |
---|
[1591] | 261 | // Check whether the rows should be imported in one or more entities |
---|
| 262 | def entities |
---|
| 263 | if( createAllEntities ) { |
---|
| 264 | entities = templates.entrySet().value.findAll { it }.entity; |
---|
| 265 | } else { |
---|
| 266 | entities = mcmap.findAll{ !it.dontimport }.entityclass.unique(); |
---|
| 267 | } |
---|
[1609] | 268 | |
---|
[1591] | 269 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 270 | def table = [] |
---|
| 271 | def failedcells = [] // list of cells that have failed to import |
---|
[1677] | 272 | |
---|
[1591] | 273 | // First check for each record whether an entity in the database should be updated, |
---|
| 274 | // or a new entity should be added. This is done before any new object is created, since |
---|
| 275 | // searching after new objects have been created (but not yet saved) will result in |
---|
| 276 | // org.hibernate.AssertionFailure: collection [...] was not processed by flush() |
---|
| 277 | // errors |
---|
| 278 | def existingEntities = [:] |
---|
| 279 | for( int i = rowindex; i <= sheet.getLastRowNum(); i++ ) { |
---|
| 280 | existingEntities[i] = findExistingEntities( entities, sheet.getRow(i), mcmap, parent ); |
---|
| 281 | } |
---|
[1609] | 282 | |
---|
[1591] | 283 | // walk through all rows and fill the table with records |
---|
| 284 | for( int i = rowindex; i <= sheet.getLastRowNum(); i++ ) { |
---|
[1611] | 285 | def row = sheet.getRow(i); |
---|
| 286 | |
---|
| 287 | if( row && !rowIsEmpty( row ) ) { |
---|
| 288 | // Create an entity record based on a row read from Excel and store the cells which failed to be mapped |
---|
| 289 | def (record, failed) = importOrUpdateRecord( templates, entities, row, mcmap, parent, table, existingEntities[i] ); |
---|
| 290 | |
---|
| 291 | // Setup the relationships between the imported entities |
---|
| 292 | relateEntities( record ); |
---|
| 293 | |
---|
| 294 | // Add record with entities and its values to the table |
---|
| 295 | table.add(record) |
---|
| 296 | |
---|
| 297 | // If failed cells have been found, add them to the failed cells list |
---|
| 298 | if (failed?.importcells?.size() > 0) failedcells.add(failed) |
---|
| 299 | } |
---|
[1591] | 300 | } |
---|
[1609] | 301 | |
---|
[1591] | 302 | return [ "table": table, "failedCells": failedcells ] |
---|
| 303 | } |
---|
[1611] | 304 | |
---|
| 305 | /** |
---|
| 306 | * Checks whether an excel row is empty |
---|
| 307 | * @param row Row from the excel sheet |
---|
| 308 | * @return True if all cells in this row are empty or the given row is null. False otherwise |
---|
| 309 | */ |
---|
| 310 | def rowIsEmpty( Row excelRow ) { |
---|
| 311 | if( !excelRow ) |
---|
| 312 | return true; |
---|
| 313 | |
---|
| 314 | def df = new DataFormatter(); |
---|
| 315 | for( int i = excelRow.getFirstCellNum(); i < excelRow.getLastCellNum(); i++ ) { |
---|
| 316 | Cell cell = excelRow.getCell( i ); |
---|
| 317 | |
---|
| 318 | try { |
---|
| 319 | def value = df.formatCellValue(cell) |
---|
| 320 | if( value ) |
---|
| 321 | return false |
---|
| 322 | } catch (NumberFormatException nfe) { |
---|
| 323 | // If the number can't be formatted, the row isn't empty |
---|
| 324 | return false; |
---|
| 325 | } |
---|
| 326 | } |
---|
| 327 | |
---|
| 328 | return true; |
---|
| 329 | } |
---|
[1591] | 330 | |
---|
| 331 | /** |
---|
[1609] | 332 | * Checks whether entities in the given row already exist in the database |
---|
| 333 | * they are updated. |
---|
| 334 | * |
---|
| 335 | * @param entities Entities that have to be imported for this row |
---|
| 336 | * @param excelRow Excel row to import into this record |
---|
| 337 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
| 338 | * @return Map Map with entities that have been found for this row. The key for the entities is the entity name (e.g.: [Sample: null, Subject: <subject object>] |
---|
| 339 | */ |
---|
| 340 | def findExistingEntities(def entities, Row excelRow, mcmap, parent ) { |
---|
| 341 | DataFormatter df = new DataFormatter(); |
---|
[1591] | 342 | |
---|
[1609] | 343 | // Find entities based on sample identifier |
---|
| 344 | def sample = findEntityByRow( dbnp.studycapturing.Sample, excelRow, mcmap, parent, [], df ); |
---|
| 345 | return retrieveEntitiesBySample( sample ); |
---|
| 346 | } |
---|
| 347 | |
---|
[1591] | 348 | /** |
---|
| 349 | * Imports a records from the excelsheet into the database. If the entities are already in the database |
---|
| 350 | * they are updated. |
---|
| 351 | * |
---|
| 352 | * This method is capable of importing Subject, Samples, SamplingEvents and Events |
---|
| 353 | * |
---|
| 354 | * @param templates Map of templates, identified by their entity as a key. For example: [ Sample: Template y ] |
---|
| 355 | * @param entities Entities that have to be imported for this row |
---|
| 356 | * @param excelRow Excel row to import into this record |
---|
| 357 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
| 358 | * @param parent Study to import all data into. Is used for determining which sample/event/subject/assay to update |
---|
| 359 | * @param importedRows Rows that have been imported before this row. These rows might contain the same entities as are |
---|
| 360 | * imported in this row. These entities should be used again, to avoid importing duplicates. |
---|
| 361 | * @return List List with two entries: |
---|
| 362 | * 0 List with ImportRecords, one for each row in the excelsheet |
---|
| 363 | * 1 List with ImportCell objects, mentioning the cells that could not be correctly imported |
---|
| 364 | * (because the value in the excelsheet can't be entered into the template field) |
---|
| 365 | */ |
---|
| 366 | def importOrUpdateRecord(def templates, def entities, Row excelRow, mcmap, Study parent = null, List importedRows, Map existingEntities ) { |
---|
| 367 | DataFormatter df = new DataFormatter(); |
---|
| 368 | def record = [] // list of entities and the read values |
---|
| 369 | def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn |
---|
[1609] | 370 | |
---|
[1591] | 371 | // Check whether this record mentions a sample that has been imported before. In that case, |
---|
| 372 | // we update that record, in order to prevent importing the same sample multiple times |
---|
| 373 | def importedEntities = []; |
---|
| 374 | if( importedRows ) |
---|
| 375 | importedEntities = importedRows.flatten().findAll { it.class == dbnp.studycapturing.Sample }.unique(); |
---|
| 376 | |
---|
[1611] | 377 | def importedSample = findEntityInImportedEntities( dbnp.studycapturing.Sample, excelRow, mcmap, importedEntities, df ) |
---|
| 378 | def imported = retrieveEntitiesBySample( importedSample ); |
---|
| 379 | |
---|
[1591] | 380 | for( entity in entities ) { |
---|
| 381 | // Check whether this entity should be added or updated |
---|
| 382 | // The entity is updated is an entity with the same 'identifier' (field |
---|
| 383 | // specified to be the identifying field) is found in the database |
---|
| 384 | def entityName = entity.name[ entity.name.lastIndexOf( '.' ) + 1..-1]; |
---|
| 385 | def template = templates[ entityName ]; |
---|
[1609] | 386 | |
---|
[1591] | 387 | // If no template is specified for this entity, continue with the next |
---|
| 388 | if( !template ) |
---|
| 389 | continue; |
---|
[1609] | 390 | |
---|
[1591] | 391 | // Check whether the object exists in the list of already imported entities |
---|
| 392 | def entityObject = imported[ entityName ] |
---|
[1609] | 393 | |
---|
[1591] | 394 | // If it doesn't, search for the entity in the database |
---|
| 395 | if( !entityObject && existingEntities ) |
---|
| 396 | entityObject = existingEntities[ entityName ]; |
---|
[1609] | 397 | |
---|
[1591] | 398 | // Otherwise, create a new object |
---|
| 399 | if( !entityObject ) |
---|
| 400 | entityObject = entity.newInstance(); |
---|
[1609] | 401 | |
---|
[1591] | 402 | // Update the template |
---|
| 403 | entityObject.template = template; |
---|
[1609] | 404 | |
---|
[1591] | 405 | // Go through the Excel row cell by cell |
---|
[1611] | 406 | for( int i = excelRow.getFirstCellNum(); i < excelRow.getLastCellNum(); i++ ) { |
---|
| 407 | Cell cell = excelRow.getCell( i ); |
---|
| 408 | |
---|
[1621] | 409 | if( !cell ) |
---|
| 410 | continue; |
---|
| 411 | |
---|
[1591] | 412 | // get the MappingColumn information of the current cell |
---|
| 413 | def mc = mcmap[cell.getColumnIndex()] |
---|
| 414 | def value |
---|
[1609] | 415 | |
---|
[1591] | 416 | // Check if column must be imported |
---|
| 417 | if (mc != null && !mc.dontimport && mc.entityclass == entity) { |
---|
| 418 | try { |
---|
[1612] | 419 | if( cell.getCellType() == Cell.CELL_TYPE_NUMERIC && DateUtil.isCellDateFormatted(cell) ) { |
---|
| 420 | // The format for date template fields is dd/mm/yyyy |
---|
| 421 | def date = cell.getDateCellValue(); |
---|
| 422 | value = date.format( "dd/MM/yyyy" ) |
---|
| 423 | } else { |
---|
| 424 | value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) |
---|
| 425 | } |
---|
[1591] | 426 | } catch (NumberFormatException nfe) { |
---|
| 427 | value = "" |
---|
| 428 | } |
---|
[1609] | 429 | |
---|
[1591] | 430 | try { |
---|
| 431 | entityObject.setFieldValue(mc.property, value) |
---|
| 432 | } catch (Exception iae) { |
---|
| 433 | log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" |
---|
| 434 | |
---|
| 435 | // store the mapping column and value which failed |
---|
| 436 | def identifier = entityName.toLowerCase() + "_" + entityObject.getIdentifier() + "_" + mc.property |
---|
[1609] | 437 | |
---|
[1591] | 438 | def mcInstance = new MappingColumn() |
---|
| 439 | mcInstance.properties = mc.properties |
---|
| 440 | failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) |
---|
| 441 | } |
---|
| 442 | } // end if |
---|
| 443 | } // end for |
---|
[1609] | 444 | |
---|
[1591] | 445 | // If a Study is entered, use it as a 'parent' for other entities |
---|
| 446 | if( entity == Study ) |
---|
| 447 | parent = entityObject; |
---|
[1609] | 448 | |
---|
[1591] | 449 | record << entityObject; |
---|
| 450 | } |
---|
[1609] | 451 | |
---|
[1591] | 452 | // a failed column means that using the entity.setFieldValue() threw an exception |
---|
| 453 | return [record, failed] |
---|
| 454 | } |
---|
[1609] | 455 | |
---|
[1591] | 456 | /** |
---|
| 457 | * Looks into the database to find an object of the given entity that should be updated, given the excel row. |
---|
| 458 | * This is done by looking at the 'preferredIdentifier' field of the object. If it exists in the row, and the |
---|
| 459 | * value is already in the database for that field, an existing object is returned. Otherwise, null is returned |
---|
| 460 | * |
---|
| 461 | * @param entity Entity to search |
---|
| 462 | * @param excelRow Excelrow to search for |
---|
| 463 | * @param mcmap Map with MappingColumns |
---|
| 464 | * @param parent Parent study for the entity (if applicable). The returned entity will also have this parent |
---|
| 465 | * @param importedRows List of entities that have been imported before. The function will first look through this list to find |
---|
| 466 | * a matching entity. |
---|
| 467 | * @return An entity that has the same identifier as entered in the excelRow. The entity is first sought in the importedRows. If it |
---|
| 468 | * is not found there, the database is queried. If no entity is found at all, null is returned. |
---|
| 469 | */ |
---|
| 470 | def findEntityByRow( Class entity, Row excelRow, def mcmap, Study parent = null, List importedEntities = [], DataFormatter df = null ) { |
---|
[1611] | 471 | if( !excelRow ) |
---|
| 472 | return |
---|
| 473 | |
---|
[1591] | 474 | if( df == null ) |
---|
| 475 | df = new DataFormatter(); |
---|
[1609] | 476 | |
---|
[1591] | 477 | def identifierField = givePreferredIdentifier( entity ); |
---|
[1609] | 478 | |
---|
[1591] | 479 | if( identifierField ) { |
---|
| 480 | // Check whether the identifierField is chosen in the column matching |
---|
| 481 | def identifierColumn = mcmap.find { it.entityclass == entity && it.property == identifierField.name }; |
---|
[1609] | 482 | |
---|
[1591] | 483 | // If it is, find the identifier and look it up in the database |
---|
| 484 | if( identifierColumn ) { |
---|
| 485 | def identifierCell = excelRow.getCell( identifierColumn.index ); |
---|
| 486 | def identifier; |
---|
| 487 | try { |
---|
| 488 | identifier = formatValue(df.formatCellValue(identifierCell), identifierColumn.templatefieldtype) |
---|
| 489 | } catch (NumberFormatException nfe) { |
---|
| 490 | identifier = null |
---|
| 491 | } |
---|
[1609] | 492 | |
---|
[1591] | 493 | // Search for an existing object with the same identifier. |
---|
| 494 | if( identifier ) { |
---|
| 495 | // First search the already imported rows |
---|
| 496 | if( importedEntities ) { |
---|
| 497 | def imported = importedEntities.find { it.getFieldValue( identifierField.name ) == identifier }; |
---|
| 498 | if( imported ) |
---|
| 499 | return imported; |
---|
| 500 | } |
---|
[1609] | 501 | |
---|
[1591] | 502 | def c = entity.createCriteria(); |
---|
[1609] | 503 | |
---|
[1591] | 504 | // If the entity has a field 'parent', the search should be limited to |
---|
| 505 | // objects with the same parent. The method entity.hasProperty( "parent" ) doesn't |
---|
| 506 | // work, since the java.lang.Class entity doesn't know of the parent property. |
---|
| 507 | if( entity.belongsTo?.containsKey( "parent" ) ) { |
---|
| 508 | // If the entity requires a parent, but none is given, no |
---|
| 509 | // results are given from the database. This prevents the user |
---|
[1609] | 510 | // of changing data in another study |
---|
[1591] | 511 | if( parent && parent.id ) { |
---|
| 512 | return c.get { |
---|
| 513 | eq( identifierField.name, identifier ) |
---|
| 514 | eq( "parent", parent ) |
---|
| 515 | } |
---|
| 516 | } |
---|
| 517 | } else { |
---|
| 518 | return c.get { |
---|
| 519 | eq( identifierField.name, identifier ) |
---|
| 520 | } |
---|
| 521 | } |
---|
| 522 | } |
---|
| 523 | } |
---|
| 524 | } |
---|
[1609] | 525 | |
---|
[1591] | 526 | // No object is found |
---|
| 527 | return null; |
---|
| 528 | } |
---|
[1609] | 529 | |
---|
[1591] | 530 | /** |
---|
[1609] | 531 | * Looks into the list of already imported entities to find an object of the given entity that should be |
---|
| 532 | * updated, given the excel row. This is done by looking at the 'preferredIdentifier' field of the object. |
---|
| 533 | * If it exists in the row, and the list of imported entities contains an object with the same |
---|
| 534 | * identifier, the existing object is returned. Otherwise, null is returned |
---|
| 535 | * |
---|
| 536 | * @param entity Entity to search |
---|
| 537 | * @param excelRow Excelrow to search for |
---|
| 538 | * @param mcmap Map with MappingColumns |
---|
| 539 | * @param importedRows List of entities that have been imported before. The function will first look through this list to find |
---|
| 540 | * a matching entity. |
---|
| 541 | * @return An entity that has the same identifier as entered in the excelRow. The entity is first sought in the importedRows. If it |
---|
| 542 | * is not found there, the database is queried. If no entity is found at all, null is returned. |
---|
| 543 | */ |
---|
| 544 | def findEntityInImportedEntities( Class entity, Row excelRow, def mcmap, List importedEntities = [], DataFormatter df = null ) { |
---|
| 545 | if( df == null ) |
---|
| 546 | df = new DataFormatter(); |
---|
| 547 | |
---|
| 548 | def allFields = entity.giveDomainFields(); |
---|
| 549 | def identifierField = allFields.find { it.preferredIdentifier } |
---|
| 550 | |
---|
| 551 | if( identifierField ) { |
---|
| 552 | // Check whether the identifierField is chosen in the column matching |
---|
| 553 | def identifierColumn = mcmap.find { it.entityclass == entity && it.property == identifierField.name }; |
---|
| 554 | |
---|
| 555 | // If it is, find the identifier and look it up in the database |
---|
| 556 | if( identifierColumn ) { |
---|
| 557 | def identifierCell = excelRow.getCell( identifierColumn.index ); |
---|
| 558 | def identifier; |
---|
| 559 | try { |
---|
| 560 | identifier = formatValue(df.formatCellValue(identifierCell), identifierColumn.templatefieldtype) |
---|
| 561 | } catch (NumberFormatException nfe) { |
---|
| 562 | identifier = null |
---|
| 563 | } |
---|
| 564 | |
---|
| 565 | // Search for an existing object with the same identifier. |
---|
| 566 | if( identifier ) { |
---|
[1591] | 567 | // First search the already imported rows |
---|
| 568 | if( importedEntities ) { |
---|
| 569 | def imported = importedEntities.find { |
---|
| 570 | def fieldValue = it.getFieldValue( identifierField.name ) |
---|
| 571 | |
---|
| 572 | if( fieldValue instanceof String ) |
---|
| 573 | return fieldValue.toLowerCase() == identifier.toLowerCase(); |
---|
| 574 | else |
---|
| 575 | return fieldValue == identifier |
---|
| 576 | }; |
---|
[1609] | 577 | if( imported ) |
---|
| 578 | return imported; |
---|
| 579 | } |
---|
| 580 | } |
---|
| 581 | } |
---|
| 582 | } |
---|
[1591] | 583 | |
---|
[1609] | 584 | // No object is found |
---|
| 585 | return null; |
---|
| 586 | } |
---|
| 587 | |
---|
| 588 | |
---|
[1591] | 589 | /** |
---|
| 590 | * Creates relation between multiple entities that have been imported. The entities are |
---|
| 591 | * all created from one row in the excel sheet. |
---|
| 592 | */ |
---|
| 593 | def relateEntities( List entities) { |
---|
| 594 | def study = entities.find { it instanceof Study } |
---|
| 595 | def subject = entities.find { it instanceof Subject } |
---|
| 596 | def sample = entities.find { it instanceof Sample } |
---|
| 597 | def event = entities.find { it instanceof Event } |
---|
| 598 | def samplingEvent = entities.find { it instanceof SamplingEvent } |
---|
| 599 | def assay = entities.find { it instanceof Assay } |
---|
[1609] | 600 | |
---|
[1591] | 601 | // A study object is found in the entity list |
---|
| 602 | if( study ) { |
---|
| 603 | if( subject ) { |
---|
| 604 | subject.parent = study; |
---|
| 605 | study.addToSubjects( subject ); |
---|
| 606 | } |
---|
| 607 | if( sample ) { |
---|
| 608 | sample.parent = study |
---|
| 609 | study.addToSamples( sample ); |
---|
| 610 | } |
---|
| 611 | if( event ) { |
---|
[1609] | 612 | event.parent = study |
---|
[1591] | 613 | study.addToEvents( event ); |
---|
| 614 | } |
---|
| 615 | if( samplingEvent ) { |
---|
| 616 | samplingEvent.parent = study |
---|
| 617 | study.addToSamplingEvents( samplingEvent ); |
---|
| 618 | } |
---|
| 619 | if( assay ) { |
---|
| 620 | assay.parent = study; |
---|
| 621 | study.addToAssays( assay ); |
---|
| 622 | } |
---|
| 623 | } |
---|
| 624 | |
---|
| 625 | if( sample ) { |
---|
| 626 | if( subject ) sample.parentSubject = subject |
---|
| 627 | if( samplingEvent ) sample.parentEvent = samplingEvent; |
---|
| 628 | if( event ) { |
---|
| 629 | def evGroup = new EventGroup(); |
---|
[1677] | 630 | |
---|
| 631 | sample.parentEventGroup = evGroup; |
---|
[1591] | 632 | evGroup.addToEvents( event ); |
---|
[1677] | 633 | |
---|
[1591] | 634 | if( subject ) evGroup.addToSubjects( subject ); |
---|
| 635 | if( samplingEvent ) evGroup.addToSamplingEvents( samplingEvent ); |
---|
[1609] | 636 | |
---|
[1591] | 637 | } |
---|
[1609] | 638 | |
---|
[1591] | 639 | if( assay ) assay.addToSamples( sample ); |
---|
| 640 | } |
---|
| 641 | } |
---|
| 642 | |
---|
| 643 | /** |
---|
[1417] | 644 | * Method to read data from a workbook and to import data into a two dimensional |
---|
| 645 | * array |
---|
| 646 | * |
---|
| 647 | * @param template_id template identifier to use fields from |
---|
| 648 | * @param wb POI horrible spreadsheet formatted workbook object |
---|
| 649 | * @param mcmap linked hashmap (preserved order) of MappingColumns |
---|
| 650 | * @param sheetindex sheet to use when using multiple sheets |
---|
| 651 | * @param rowindex first row to start with reading the actual data (NOT the header) |
---|
| 652 | * @return two dimensional array containing records (with entities) |
---|
| 653 | * |
---|
| 654 | * @see dbnp.importer.MappingColumn |
---|
| 655 | */ |
---|
| 656 | def importData(template_id, Workbook wb, int sheetindex, int rowindex, mcmap) { |
---|
| 657 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 658 | def template = Template.get(template_id) |
---|
| 659 | def table = [] |
---|
| 660 | def failedcells = [] // list of records |
---|
| 661 | // walk through all rows and fill the table with records |
---|
| 662 | (rowindex..sheet.getLastRowNum()).each { i -> |
---|
| 663 | // Create an entity record based on a row read from Excel and store the cells which failed to be mapped |
---|
| 664 | def (record, failed) = createRecord(template, sheet.getRow(i), mcmap) |
---|
| 665 | |
---|
| 666 | // Add record with entity and its values to the table |
---|
| 667 | table.add(record) |
---|
| 668 | |
---|
| 669 | // If failed cells have been found, add them to the failed cells list |
---|
| 670 | if (failed?.importcells?.size() > 0) failedcells.add(failed) |
---|
[147] | 671 | } |
---|
| 672 | |
---|
[1417] | 673 | return [table, failedcells] |
---|
[147] | 674 | } |
---|
[231] | 675 | |
---|
[1553] | 676 | /** |
---|
| 677 | * Removes a cell from the failedCells list, based on the entity and field. If the entity and field didn't fail before |
---|
| 678 | * the method doesn't do anything. |
---|
| 679 | * |
---|
| 680 | * @param failedcell list of cells that have failed previously |
---|
| 681 | * @param entity entity to remove from the failedcells list |
---|
| 682 | * @param field field to remove the failed cell for. If no field is given, all cells for this entity will be removed |
---|
| 683 | * @return List Updated list of cells that have failed |
---|
| 684 | */ |
---|
| 685 | def removeFailedCell(failedcells, entity, field = null ) { |
---|
| 686 | if( !entity ) |
---|
| 687 | return failedcells; |
---|
| 688 | |
---|
| 689 | def filterClosure |
---|
| 690 | if( field ) { |
---|
| 691 | def entityIdField = "entity_" + entity.getIdentifier() + "_" + field.name.toLowerCase() |
---|
| 692 | filterClosure = { cell -> cell.entityidentifier != entityIdField } |
---|
| 693 | } else { |
---|
| 694 | def entityIdField = "entity_" + entity.getIdentifier() + "_" |
---|
| 695 | filterClosure = { cell -> !cell.entityidentifier.startsWith( entityIdField ) } |
---|
| 696 | } |
---|
| 697 | |
---|
| 698 | failedcells.each { record -> |
---|
| 699 | record.importcells = record.importcells.findAll( filterClosure ) |
---|
| 700 | } |
---|
| 701 | |
---|
| 702 | return failedcells; |
---|
| 703 | } |
---|
| 704 | |
---|
| 705 | /** |
---|
| 706 | * Returns the name of an input field as it is used for a specific entity in HTML. |
---|
| 707 | * |
---|
| 708 | * @param entity entity to retrieve the field name for |
---|
| 709 | * @param field field to retrieve the field name for |
---|
| 710 | * @return String Name of the HTML field for the given entity and field. Can also be used in the map |
---|
| 711 | * of request parameters |
---|
| 712 | */ |
---|
| 713 | def getFieldNameInTableEditor(entity, field) { |
---|
[1591] | 714 | def entityName = entity?.class.name[ entity?.class.name.lastIndexOf(".") + 1..-1] |
---|
[1609] | 715 | |
---|
[1553] | 716 | if( field instanceof TemplateField ) |
---|
| 717 | field = field.escapedName(); |
---|
| 718 | |
---|
[1609] | 719 | return entityName.toLowerCase() + "_" + entity.getIdentifier() + "_" + field.toLowerCase() |
---|
[1553] | 720 | } |
---|
| 721 | |
---|
| 722 | /** |
---|
| 723 | * Retrieves a mapping column from a list based on the given fieldname |
---|
| 724 | * @param mappingColumns List of mapping columns |
---|
[1608] | 725 | * @param fieldName Field name to find |
---|
| 726 | * @return Mapping column if a column is found, null otherwise |
---|
[1553] | 727 | */ |
---|
| 728 | def findMappingColumn( mappingColumns, String fieldName ) { |
---|
| 729 | return mappingColumns.find { it.property == fieldName.toLowerCase() } |
---|
| 730 | } |
---|
| 731 | |
---|
[1417] | 732 | /** Method to put failed cells back into the datamatrix. Failed cells are cell values |
---|
| 733 | * which could not be stored in an entity (e.g. Humu Supiuns in an ontology field). |
---|
| 734 | * Empty corrections should not be stored |
---|
| 735 | * |
---|
| 736 | * @param datamatrix two dimensional array containing entities and possibly also failed cells |
---|
| 737 | * @param failedcells list with maps of failed cells in [mappingcolumn, cell] format |
---|
| 738 | * @param correctedcells map of corrected cells in [cellhashcode, value] format |
---|
| 739 | * */ |
---|
| 740 | def saveCorrectedCells(datamatrix, failedcells, correctedcells) { |
---|
[1093] | 741 | |
---|
[1417] | 742 | // Loop through all failed cells (stored as |
---|
| 743 | failedcells.each { record -> |
---|
| 744 | record.value.importcells.each { cell -> |
---|
[1093] | 745 | |
---|
[1417] | 746 | // Get the corrected value |
---|
| 747 | def correctedvalue = correctedcells.find { it.key.toInteger() == cell.getIdentifier()}.value |
---|
| 748 | |
---|
| 749 | // Find the record in the table which the mappingcolumn belongs to |
---|
| 750 | def tablerecord = datamatrix.find { it.hashCode() == record.key } |
---|
| 751 | |
---|
| 752 | // Loop through all entities in the record and correct them if necessary |
---|
| 753 | tablerecord.each { rec -> |
---|
| 754 | rec.each { entity -> |
---|
| 755 | try { |
---|
| 756 | // Update the entity field |
---|
| 757 | entity.setFieldValue(cell.mappingcolumn.property, correctedvalue) |
---|
[1422] | 758 | //log.info "Adjusted " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
[1417] | 759 | } |
---|
| 760 | catch (Exception e) { |
---|
[1422] | 761 | //log.info "Could not map corrected ontology: " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
[1417] | 762 | } |
---|
| 763 | } |
---|
| 764 | } // end of table record |
---|
| 765 | } // end of cell record |
---|
| 766 | } // end of failedlist |
---|
[274] | 767 | } |
---|
[1050] | 768 | |
---|
[1417] | 769 | /** |
---|
| 770 | * Method to store a matrix containing the entities in a record like structure. Every row in the table |
---|
| 771 | * contains one or more entity objects (which contain fields with values). So actually a row represents |
---|
| 772 | * a record with fields from one or more different entities. |
---|
| 773 | * |
---|
| 774 | * @param study entity Study |
---|
| 775 | * @param datamatrix two dimensional array containing entities with values read from Excel file |
---|
| 776 | */ |
---|
[1603] | 777 | static saveDatamatrix(Study study, importerEntityType, datamatrix, authenticationService, log) { |
---|
[1417] | 778 | def validatedSuccesfully = 0 |
---|
| 779 | def entitystored = null |
---|
[1083] | 780 | |
---|
[1417] | 781 | // Study passed? Sync data |
---|
[1603] | 782 | if (study != null && importerEntityType != 'Study') study.refresh() |
---|
[1103] | 783 | |
---|
[1417] | 784 | // go through the data matrix, read every record and validate the entity and try to persist it |
---|
| 785 | datamatrix.each { record -> |
---|
| 786 | record.each { entity -> |
---|
| 787 | switch (entity.getClass()) { |
---|
[1469] | 788 | case Study: log.info ".importer wizard, persisting Study `" + entity + "`: " |
---|
[1421] | 789 | entity.owner = authenticationService.getLoggedInUser() |
---|
[1083] | 790 | |
---|
[1603] | 791 | if (entity.validate()) { |
---|
[1553] | 792 | if (!entity.save(flush:true)) { |
---|
| 793 | log.error ".importer wizard, study could not be saved: " + entity |
---|
| 794 | throw new Exception('.importer wizard, study could not be saved: ' + entity) |
---|
| 795 | } |
---|
| 796 | } else { |
---|
| 797 | log.error ".importer wizard, study could not be validated: " + entity |
---|
| 798 | throw new Exception('.importer wizard, study could not be validated: ' + entity) |
---|
| 799 | } |
---|
| 800 | |
---|
| 801 | break |
---|
[1469] | 802 | case Subject: log.info ".importer wizard, persisting Subject `" + entity + "`: " |
---|
| 803 | |
---|
[1553] | 804 | // is the current entity not already in the database? |
---|
| 805 | //entitystored = isEntityStored(entity) |
---|
[1103] | 806 | |
---|
[1553] | 807 | // this entity is new, so add it to the study |
---|
| 808 | //if (entitystored==null) |
---|
[1411] | 809 | |
---|
[1417] | 810 | study.addToSubjects(entity) |
---|
[1277] | 811 | |
---|
[1417] | 812 | break |
---|
[1469] | 813 | case Event: log.info ".importer wizard, persisting Event `" + entity + "`: " |
---|
[1417] | 814 | study.addToEvents(entity) |
---|
| 815 | break |
---|
[1469] | 816 | case Sample: log.info ".importer wizard, persisting Sample `" + entity + "`: " |
---|
[1411] | 817 | |
---|
[1553] | 818 | // is this sample validatable (sample name unique for example?) |
---|
[1417] | 819 | study.addToSamples(entity) |
---|
[1411] | 820 | |
---|
[1417] | 821 | break |
---|
[1469] | 822 | case SamplingEvent: log.info ".importer wizard, persisting SamplingEvent `" + entity + "`: " |
---|
[1417] | 823 | study.addToSamplingEvents(entity) |
---|
| 824 | break |
---|
[1469] | 825 | default: log.info ".importer wizard, skipping persisting of `" + entity.getclass() + "`" |
---|
[1417] | 826 | break |
---|
| 827 | } // end switch |
---|
| 828 | } // end record |
---|
| 829 | } // end datamatrix |
---|
[284] | 830 | |
---|
[1417] | 831 | // validate study |
---|
[1603] | 832 | if (importerEntityType != 'Study') { |
---|
| 833 | if (study.validate()) { |
---|
| 834 | if (!study.save(flush: true)) { |
---|
| 835 | //this.appendErrors(flow.study, flash.wizardErrors) |
---|
| 836 | throw new Exception('.importer wizard [saveDatamatrix] error while saving study') |
---|
| 837 | } |
---|
| 838 | } else { |
---|
| 839 | throw new Exception('.importer wizard [saveDatamatrix] study does not validate') |
---|
[1417] | 840 | } |
---|
| 841 | } |
---|
[1002] | 842 | |
---|
[1417] | 843 | //persistEntity(study) |
---|
[1002] | 844 | |
---|
[1417] | 845 | //return [validatedSuccesfully, updatedentities, failedtopersist] |
---|
| 846 | //return [0,0,0] |
---|
| 847 | return true |
---|
| 848 | } |
---|
[1411] | 849 | |
---|
[1417] | 850 | /** |
---|
| 851 | * Check whether an entity already exist. A unique field in the entity is |
---|
| 852 | * used to check whether the instantiated entity (read from Excel) is new. |
---|
| 853 | * If the entity is found in the database it will be returned as is. |
---|
| 854 | * |
---|
| 855 | * @param entity entity object like a Study, Subject, Sample et cetera |
---|
| 856 | * @return entity if found, otherwise null |
---|
| 857 | */ |
---|
| 858 | def isEntityStored(entity) { |
---|
| 859 | switch (entity.getClass()) { |
---|
| 860 | case Study: return Study.findByCode(entity.code) |
---|
| 861 | break |
---|
| 862 | case Subject: return Subject.findByParentAndName(entity.parent, entity.name) |
---|
| 863 | break |
---|
| 864 | case Event: break |
---|
| 865 | case Sample: |
---|
| 866 | break |
---|
| 867 | case SamplingEvent: break |
---|
| 868 | default: // unknown entity |
---|
| 869 | return null |
---|
| 870 | } |
---|
| 871 | } |
---|
[1411] | 872 | |
---|
[1417] | 873 | /** |
---|
| 874 | * Find the entity and update the fields. The entity is an instance |
---|
| 875 | * read from Excel. This method looks in the database for the entity |
---|
| 876 | * having the same identifier. If it has found the same entity |
---|
| 877 | * already in the database, it will update the record. |
---|
| 878 | * |
---|
| 879 | * @param entitystored existing record in the database to update |
---|
| 880 | * @param entity entity read from Excel |
---|
| 881 | */ |
---|
| 882 | def updateEntity(entitystored, entity) { |
---|
| 883 | switch (entity.getClass()) { |
---|
| 884 | case Study: break |
---|
| 885 | case Subject: entitystored.properties = entity.properties |
---|
| 886 | entitystored.save() |
---|
| 887 | break |
---|
| 888 | case Event: break |
---|
| 889 | case Sample: break |
---|
| 890 | case SamplingEvent: break |
---|
| 891 | default: // unknown entity |
---|
| 892 | return null |
---|
| 893 | } |
---|
| 894 | } |
---|
[1411] | 895 | |
---|
[1417] | 896 | /** |
---|
| 897 | * Method to persist entities into the database |
---|
| 898 | * Checks whether entity already exists (based on identifier column 'name') |
---|
| 899 | * |
---|
| 900 | * @param entity entity object like Study, Subject, Protocol et cetera |
---|
| 901 | * |
---|
| 902 | */ |
---|
| 903 | boolean persistEntity(entity) { |
---|
[1469] | 904 | /*log.info ".import wizard persisting ${entity}" |
---|
[1553] | 905 | try { |
---|
| 906 | entity.save(flush: true) |
---|
| 907 | return true |
---|
| 908 | } catch (Exception e) { |
---|
| 909 | def session = sessionFactory.currentSession |
---|
| 910 | session.setFlushMode(org.hibernate.FlushMode.MANUAL) |
---|
| 911 | log.error ".import wizard, failed to save entity:\n" + org.apache.commons.lang.exception.ExceptionUtils.getRootCauseMessage(e) |
---|
| 912 | } |
---|
| 913 | return true*/ |
---|
| 914 | //println "persistEntity" |
---|
[1417] | 915 | } |
---|
| 916 | |
---|
[725] | 917 | /** |
---|
| 918 | * This method creates a record (array) containing entities with values |
---|
| 919 | * |
---|
| 920 | * @param template_id template identifier |
---|
| 921 | * @param excelrow POI based Excel row containing the cells |
---|
| 922 | * @param mcmap map containing MappingColumn objects |
---|
[1417] | 923 | * @return list of entities and list of failed cells |
---|
[725] | 924 | */ |
---|
[1411] | 925 | def createRecord(template, Row excelrow, mcmap) { |
---|
[1417] | 926 | def df = new DataFormatter() |
---|
| 927 | def tft = TemplateFieldType |
---|
[1093] | 928 | def record = [] // list of entities and the read values |
---|
[1417] | 929 | def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn |
---|
[273] | 930 | |
---|
[735] | 931 | // Initialize all possible entities with the chosen template |
---|
[725] | 932 | def study = new Study(template: template) |
---|
| 933 | def subject = new Subject(template: template) |
---|
| 934 | def samplingEvent = new SamplingEvent(template: template) |
---|
| 935 | def event = new Event(template: template) |
---|
| 936 | def sample = new Sample(template: template) |
---|
[273] | 937 | |
---|
[735] | 938 | // Go through the Excel row cell by cell |
---|
[1087] | 939 | for (Cell cell: excelrow) { |
---|
[735] | 940 | // get the MappingColumn information of the current cell |
---|
[725] | 941 | def mc = mcmap[cell.getColumnIndex()] |
---|
[1417] | 942 | def value |
---|
[666] | 943 | |
---|
[725] | 944 | // Check if column must be imported |
---|
[1417] | 945 | if (mc != null) if (!mc.dontimport) { |
---|
[725] | 946 | try { |
---|
| 947 | value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) |
---|
| 948 | } catch (NumberFormatException nfe) { |
---|
| 949 | value = "" |
---|
| 950 | } |
---|
[274] | 951 | |
---|
[1553] | 952 | try { |
---|
[1417] | 953 | // which entity does the current cell (field) belong to? |
---|
[1527] | 954 | switch (mc.entityclass) { |
---|
[1417] | 955 | case Study: // does the entity already exist in the record? If not make it so. |
---|
[1527] | 956 | (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(study) |
---|
[1417] | 957 | study.setFieldValue(mc.property, value) |
---|
| 958 | break |
---|
[1527] | 959 | case Subject: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(subject) |
---|
[1417] | 960 | subject.setFieldValue(mc.property, value) |
---|
| 961 | break |
---|
[1527] | 962 | case SamplingEvent: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(samplingEvent) |
---|
[1417] | 963 | samplingEvent.setFieldValue(mc.property, value) |
---|
| 964 | break |
---|
[1527] | 965 | case Event: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(event) |
---|
[1417] | 966 | event.setFieldValue(mc.property, value) |
---|
| 967 | break |
---|
[1527] | 968 | case Sample: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(sample) |
---|
[1417] | 969 | sample.setFieldValue(mc.property, value) |
---|
| 970 | break |
---|
| 971 | case Object: // don't import |
---|
| 972 | break |
---|
| 973 | } // end switch |
---|
| 974 | } catch (Exception iae) { |
---|
| 975 | log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" |
---|
| 976 | // store the mapping column and value which failed |
---|
[1553] | 977 | def identifier |
---|
[1609] | 978 | def fieldName = mc.property?.toLowerCase() |
---|
| 979 | |
---|
[1527] | 980 | switch (mc.entityclass) { |
---|
[1609] | 981 | case Study: identifier = "entity_" + study.getIdentifier() + "_" + fieldName |
---|
[1417] | 982 | break |
---|
[1609] | 983 | case Subject: identifier = "entity_" + subject.getIdentifier() + "_" + fieldName |
---|
[1417] | 984 | break |
---|
[1609] | 985 | case SamplingEvent: identifier = "entity_" + samplingEvent.getIdentifier() + "_" + fieldName |
---|
[1417] | 986 | break |
---|
[1609] | 987 | case Event: identifier = "entity_" + event.getIdentifier() + "_" + fieldName |
---|
[1417] | 988 | break |
---|
[1609] | 989 | case Sample: identifier = "entity_" + sample.getIdentifier() + "_" + fieldName |
---|
[1417] | 990 | break |
---|
| 991 | case Object: // don't import |
---|
| 992 | break |
---|
| 993 | } |
---|
| 994 | |
---|
| 995 | def mcInstance = new MappingColumn() |
---|
| 996 | mcInstance.properties = mc.properties |
---|
| 997 | failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) |
---|
| 998 | } |
---|
[725] | 999 | } // end |
---|
| 1000 | } // end for |
---|
[1417] | 1001 | // a failed column means that using the entity.setFieldValue() threw an exception |
---|
| 1002 | return [record, failed] |
---|
| 1003 | } |
---|
[297] | 1004 | |
---|
[1417] | 1005 | /** |
---|
| 1006 | * Method to parse a value conform a specific type |
---|
| 1007 | * @param value string containing the value |
---|
| 1008 | * @return object corresponding to the TemplateFieldType |
---|
| 1009 | */ |
---|
| 1010 | def formatValue(String value, TemplateFieldType type) throws NumberFormatException { |
---|
| 1011 | switch (type) { |
---|
| 1012 | case TemplateFieldType.STRING: return value.trim() |
---|
| 1013 | case TemplateFieldType.TEXT: return value.trim() |
---|
| 1014 | case TemplateFieldType.LONG: return (long) Double.valueOf(value) |
---|
[1553] | 1015 | //case TemplateFieldType.FLOAT : return Float.valueOf(value.replace(",",".")); |
---|
[1417] | 1016 | case TemplateFieldType.DOUBLE: return Double.valueOf(value.replace(",", ".")); |
---|
| 1017 | case TemplateFieldType.STRINGLIST: return value.trim() |
---|
| 1018 | case TemplateFieldType.ONTOLOGYTERM: return value.trim() |
---|
| 1019 | case TemplateFieldType.DATE: return value |
---|
| 1020 | default: return value |
---|
| 1021 | } |
---|
| 1022 | } |
---|
[1609] | 1023 | |
---|
[1591] | 1024 | /** |
---|
| 1025 | * Returns the preferred identifier field for a given entity or |
---|
| 1026 | * null if no preferred identifier is given |
---|
| 1027 | * @param entity TemplateEntity class |
---|
| 1028 | * @return The preferred identifier field or NULL if no preferred identifier is given |
---|
| 1029 | */ |
---|
| 1030 | public TemplateField givePreferredIdentifier( Class entity ) { |
---|
| 1031 | def allFields = entity.giveDomainFields(); |
---|
| 1032 | return allFields.find { it.preferredIdentifier } |
---|
| 1033 | } |
---|
[684] | 1034 | |
---|
[1417] | 1035 | // classes for fuzzy string matching |
---|
| 1036 | // <FUZZY MATCHING> |
---|
[1126] | 1037 | |
---|
[1417] | 1038 | static def similarity(l_seq, r_seq, degree = 2) { |
---|
| 1039 | def l_histo = countNgramFrequency(l_seq, degree) |
---|
| 1040 | def r_histo = countNgramFrequency(r_seq, degree) |
---|
[1126] | 1041 | |
---|
[1417] | 1042 | dotProduct(l_histo, r_histo) / |
---|
[1553] | 1043 | Math.sqrt(dotProduct(l_histo, l_histo) * |
---|
[1417] | 1044 | dotProduct(r_histo, r_histo)) |
---|
| 1045 | } |
---|
[1126] | 1046 | |
---|
[1417] | 1047 | static def countNgramFrequency(sequence, degree) { |
---|
| 1048 | def histo = [:] |
---|
| 1049 | def items = sequence.size() |
---|
[1126] | 1050 | |
---|
[1417] | 1051 | for (int i = 0; i + degree <= items; i++) { |
---|
| 1052 | def gram = sequence[i..<(i + degree)] |
---|
| 1053 | histo[gram] = 1 + histo.get(gram, 0) |
---|
| 1054 | } |
---|
| 1055 | histo |
---|
| 1056 | } |
---|
[1126] | 1057 | |
---|
[1417] | 1058 | static def dotProduct(l_histo, r_histo) { |
---|
| 1059 | def sum = 0 |
---|
| 1060 | l_histo.each { key, value -> |
---|
| 1061 | sum = sum + l_histo[key] * r_histo.get(key, 0) |
---|
| 1062 | } |
---|
| 1063 | sum |
---|
| 1064 | } |
---|
[1126] | 1065 | |
---|
[1417] | 1066 | static def stringSimilarity(l_str, r_str, degree = 2) { |
---|
[1126] | 1067 | |
---|
[1417] | 1068 | similarity(l_str.toString().toLowerCase().toCharArray(), |
---|
[1553] | 1069 | r_str.toString().toLowerCase().toCharArray(), |
---|
| 1070 | degree) |
---|
[1417] | 1071 | } |
---|
[1126] | 1072 | |
---|
[1417] | 1073 | static def mostSimilar(pattern, candidates, threshold = 0) { |
---|
| 1074 | def topScore = 0 |
---|
| 1075 | def bestFit = null |
---|
[1126] | 1076 | |
---|
[1417] | 1077 | candidates.each { candidate -> |
---|
| 1078 | def score = stringSimilarity(pattern, candidate) |
---|
| 1079 | if (score > topScore) { |
---|
| 1080 | topScore = score |
---|
| 1081 | bestFit = candidate |
---|
| 1082 | } |
---|
| 1083 | } |
---|
[1126] | 1084 | |
---|
[1417] | 1085 | if (topScore < threshold) |
---|
| 1086 | bestFit = null |
---|
| 1087 | |
---|
| 1088 | bestFit |
---|
| 1089 | } |
---|
| 1090 | // </FUZZY MATCHING> |
---|
| 1091 | |
---|
[147] | 1092 | } |
---|