Changeset 1417
- Timestamp:
- Jan 20, 2011, 4:10:50 PM (12 years ago)
- Location:
- trunk/grails-app
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/grails-app/controllers/dbnp/importer/ImporterController.groovy
r1416 r1417 92 92 [title: 'Properties'], 93 93 [title: 'Mappings'], 94 [title: 'Imported'],94 //[title: 'Imported'], 95 95 [title: 'Persist'] 96 96 ] … … 212 212 error() 213 213 } 214 }.to " pageFour"214 }.to "save" 215 215 on("previous").to "pageTwo" 216 216 } … … 248 248 249 249 if (saveEntities(flow, params)) { 250 //if (ImporterService.saveDatamatrix(flow.importer_study, flow.importer_importeddata)) { 250 251 log.error ".import wizard succesfully persisted all entities" 251 def session = sessionFactory.getCurrentSession()252 session.clear()252 //def session = sessionFactory.getCurrentSession() 253 //session.clear() 253 254 success() 254 255 } else { -
trunk/grails-app/services/dbnp/importer/ImporterService.groovy
r1416 r1417 5 5 * based files. 6 6 * 7 * @package 8 * @author 9 * @since 7 * @package importer 8 * @author t.w.abma@umcutrecht.nl 9 * @since 20100126 10 10 * 11 11 * Revision information: … … 16 16 17 17 package dbnp.importer 18 18 19 import org.apache.poi.ss.usermodel.* 19 20 import org.apache.poi.xssf.usermodel.XSSFCell … … 26 27 import dbnp.studycapturing.Event 27 28 import dbnp.studycapturing.Sample 29 import dbnp.data.Term 30 import org.apache.commons.lang.RandomStringUtils 28 31 29 32 class ImporterService { 30 def AuthenticationService 31 32 boolean transactional = true 33 34 /** 35 * @param is input stream representing the (workbook) resource 36 * @return high level representation of the workbook 37 */ 38 Workbook getWorkbook(InputStream is) { 39 WorkbookFactory.create(is) 40 } 41 42 /** 43 * @param wb high level representation of the workbook 44 * @param sheetindex sheet to use within the workbook 45 * @return header representation as a MappingColumn hashmap 46 */ 47 def getHeader(Workbook wb, int sheetindex, int headerrow, int datamatrix_start, theEntity=null) { 48 def sheet = wb.getSheetAt(sheetindex) 49 def sheetrow = sheet.getRow(datamatrix_start) 50 //def header = [] 51 def header = [:] 52 def df = new DataFormatter() 53 def property = new String() 54 55 //for (Cell c: sheet.getRow(datamatrix_start)) { 56 57 (0..sheetrow.getLastCellNum() -1 ).each { columnindex -> 58 59 //def index = c.getColumnIndex() 60 def datamatrix_celltype = sheet.getRow(datamatrix_start).getCell(columnindex,Row.CREATE_NULL_AS_BLANK).getCellType() 61 def datamatrix_celldata = df.formatCellValue(sheet.getRow(datamatrix_start).getCell(columnindex)) 62 def datamatrix_cell = sheet.getRow(datamatrix_start).getCell(columnindex) 63 def headercell = sheet.getRow(headerrow-1+sheet.getFirstRowNum()).getCell(columnindex) 64 def tft = TemplateFieldType.STRING //default templatefield type 65 66 // Check for every celltype, currently redundant code, but possibly this will be 67 // a piece of custom code for every cell type like specific formatting 68 69 switch (datamatrix_celltype) { 70 case Cell.CELL_TYPE_STRING: 71 //parse cell value as double 72 def doubleBoolean = true 73 def fieldtype = TemplateFieldType.STRING 74 75 // is this string perhaps a double? 76 try { 77 formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) 78 } catch (NumberFormatException nfe) { doubleBoolean = false } 79 finally { 80 if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE 81 } 82 83 header[columnindex] = new dbnp.importer.MappingColumn(name:df.formatCellValue(headercell), 84 templatefieldtype:fieldtype, 85 index:columnindex, 86 entity:theEntity, 87 property:property); 88 89 break 90 case Cell.CELL_TYPE_NUMERIC: 91 def fieldtype = TemplateFieldType.LONG 92 def doubleBoolean = true 93 def longBoolean = true 94 95 // is this cell really an integer? 96 try { 97 Long.valueOf(datamatrix_celldata) 98 } catch (NumberFormatException nfe) { longBoolean = false } 99 finally { 100 if (longBoolean) fieldtype = TemplateFieldType.LONG 101 } 102 103 // it's not an long, perhaps a double? 104 if (!longBoolean) 105 try { 106 formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) 107 } catch (NumberFormatException nfe) { doubleBoolean = false } 108 finally { 109 if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE 110 } 111 112 if (DateUtil.isCellDateFormatted(datamatrix_cell)) fieldtype = TemplateFieldType.DATE 113 114 header[columnindex] = new dbnp.importer.MappingColumn(name:df.formatCellValue(headercell), 115 templatefieldtype:fieldtype, 116 index:columnindex, 117 entity:theEntity, 118 property:property); 119 break 120 case Cell.CELL_TYPE_BLANK: 121 header[columnindex] = new dbnp.importer.MappingColumn(name:df.formatCellValue(headercell), 122 templatefieldtype:TemplateFieldType.STRING, 123 index:columnindex, 124 entity:theEntity, 125 property:property); 126 break 127 default: 128 header[columnindex] = new dbnp.importer.MappingColumn(name:df.formatCellValue(headercell), 129 templatefieldtype:TemplateFieldType.STRING, 130 index:columnindex, 131 entity:theEntity, 132 property:property); 133 break 134 } // end of switch 135 } // end of cell loop 136 return header 137 } 138 139 /** 140 * This method is meant to return a matrix of the rows and columns 141 * used in the preview 142 * 143 * @param wb workbook object 144 * @param sheetindex sheet index used 145 * @param rows amount of rows returned 146 * @return two dimensional array (matrix) of Cell objects 147 */ 148 149 Object[][] getDatamatrix(Workbook wb, header, int sheetindex, int datamatrix_start, int count) { 150 def sheet = wb.getSheetAt(sheetindex) 151 def rows = [] 152 def df = new DataFormatter() 153 154 count = (count < sheet.getLastRowNum()) ? count : sheet.getLastRowNum() 155 156 // walk through all rows 157 ((datamatrix_start+sheet.getFirstRowNum())..count).each { rowindex -> 158 def row = [] 159 160 (0..header.size()-1).each { columnindex -> 161 def c = sheet.getRow(rowindex).getCell(columnindex, Row.CREATE_NULL_AS_BLANK) 162 row.add(c) 163 } 164 165 rows.add(row) 166 } 167 168 return rows 169 } 170 171 /** 172 * This method will move a file to a new location. 173 * 174 * @param file File object to move 175 * @param folderpath folder to move the file to 176 * @param filename (new) filename to give 177 * @return if file has been moved succesful, the new path and filename will be returned, otherwise an empty string will be returned 178 */ 179 def moveFile(File file, String folderpath, String filename) { 180 try { 181 def rnd = ""; //System.currentTimeMillis() 182 file.transferTo(new File(folderpath, rnd+filename)) 183 return folderpath + filename 184 } catch(Exception exception) { 185 log.error "File move error, ${exception}" 186 return "" 187 } 188 } 189 190 /** 191 * @return random numeric value 192 */ 193 def random = { 194 return System.currentTimeMillis() + Runtime.runtime.freeMemory() 195 } 196 197 /** 198 * Method to read data from a workbook and to import data into a two dimensional 199 * array 200 * 201 * @param template_id template identifier to use fields from 202 * @param wb POI horrible spreadsheet formatted workbook object 203 * @param mcmap linked hashmap (preserved order) of MappingColumns 204 * @param sheetindex sheet to use when using multiple sheets 205 * @param rowindex first row to start with reading the actual data (NOT the header) 206 * @return two dimensional array containing records (with entities) 207 * 208 * @see dbnp.importer.MappingColumn 209 */ 210 def importData(template_id, Workbook wb, int sheetindex, int rowindex, mcmap) { 211 def sheet = wb.getSheetAt(sheetindex) 212 def template = Template.get(template_id) 213 def table = [] 214 def failedcells = [] // list of records 215 216 // walk through all rows and fill the table with records 217 (rowindex..sheet.getLastRowNum()).each { i -> 218 // Create an entity record based on a row read from Excel and store the cells which failed to be mapped 219 def (record, failed) = createRecord(template, sheet.getRow(i), mcmap) 220 221 // Add record with entity and its values to the table 222 table.add(record) 223 224 // If failed cells have been found, add them to the failed cells list 225 if (failed?.importcells?.size()>0) failedcells.add(failed) 226 } 227 228 return [table,failedcells] 229 } 230 231 /** Method to put failed cells back into the datamatrix. Failed cells are cell values 232 * which could not be stored in an entity (e.g. Humu Supiuns in an ontology field). 233 * Empty corrections should not be stored 234 * 235 * @param datamatrix two dimensional array containing entities and possibly also failed cells 236 * @param failedcells list with maps of failed cells in [mappingcolumn, cell] format 237 * @param correctedcells map of corrected cells in [cellhashcode, value] format 238 **/ 239 def saveCorrectedCells(datamatrix, failedcells, correctedcells) { 240 241 // Loop through all failed cells (stored as 242 failedcells.each { record -> 243 record.value.importcells.each { cell -> 244 245 // Get the corrected value 246 def correctedvalue = correctedcells.find { it.key.toInteger() == cell.getIdentifier()}.value 247 248 // Find the record in the table which the mappingcolumn belongs to 249 def tablerecord = datamatrix.find { it.hashCode() == record.key } 250 251 // Loop through all entities in the record and correct them if necessary 252 tablerecord.each { rec -> 253 rec.each { entity -> 254 try { 255 // Update the entity field 256 entity.setFieldValue(cell.mappingcolumn.property, correctedvalue) 257 //println "Adjusted " + cell.mappingcolumn.property + " to " + correctedvalue 258 } 259 catch (Exception e) { 260 //println "Could not map corrected ontology: " + cell.mappingcolumn.property + " to " + correctedvalue 261 } 262 } 263 } // end of table record 264 } // end of cell record 265 } // end of failedlist 266 } 267 268 /** 269 * Method to store a matrix containing the entities in a record like structure. Every row in the table 270 * contains one or more entity objects (which contain fields with values). So actually a row represents 271 * a record with fields from one or more different entities. 272 * 273 * @param study entity Study 274 * @param datamatrix two dimensional array containing entities with values read from Excel file 275 */ 276 def saveDatamatrix(Study study, datamatrix) { 277 def validatedSuccesfully = 0 278 def entitystored = null 279 280 // Study passed? Sync data 281 if (study!=null) study.refresh() 282 283 //study.subjects.each { it.refresh() } 284 285 // go through the data matrix, read every record and validate the entity and try to persist it 286 datamatrix.each { record -> 287 record.each { entity -> 288 switch (entity.getClass()) { 289 case Study : log.info "Persisting Study `" + entity + "`: " 290 entity.owner = AuthenticationService.getLoggedInUser() 291 persistEntity(entity) 292 break 293 case Subject : log.info "Persisting Subject `" + entity + "`: " 294 295 // is the current entity not already in the database? 296 //entitystored = isEntityStored(entity) 297 298 // this entity is new, so add it to the study 299 //if (entitystored==null) 300 study.addToSubjects(entity) 301 302 /*else { // existing entity, so update it 303 updateEntity(entitystored, entity) 304 updatedentities.add(entity) 305 }*/ 306 307 break 308 case Event : log.info "Persisting Event `" + entity + "`: " 309 study.addToEvents(entity) 310 break 311 case Sample : log.info "Persisting Sample `" + entity +"`: " 312 313 // is this sample validatable (sample name unique for example?) 314 study.addToSamples(entity) 315 316 break 317 case SamplingEvent: log.info "Persisting SamplingEvent `" + entity + "`: " 318 study.addToSamplingEvents(entity) 319 break 320 default : log.info "Skipping persisting of `" + entity.getclass() +"`" 321 break 322 } // end switch 323 } // end record 324 } // end datamatrix 325 326 327 if (study!=null) study.save(failOnError:true) 328 //persistEntity(study) 329 330 //return [validatedSuccesfully, updatedentities, failedtopersist] 331 //return [0,0,0] 332 } 333 334 /** 335 * Check whether an entity already exist. A unique field in the entity is 336 * used to check whether the instantiated entity (read from Excel) is new. 337 * If the entity is found in the database it will be returned as is. 338 * 339 * @param entity entity object like a Study, Subject, Sample et cetera 340 * @return entity if found, otherwise null 341 */ 342 def isEntityStored(entity) { 343 switch (entity.getClass()) { 344 case Study : return Study.findByCode(entity.code) 345 break 346 case Subject : return Subject.findByParentAndName(entity.parent, entity.name) 347 break 348 case Event : break 349 case Sample : 350 break 351 case SamplingEvent : break 352 default : // unknown entity 353 return null 354 } 355 } 356 357 /** 358 * Find the entity and update the fields. The entity is an instance 359 * read from Excel. This method looks in the database for the entity 360 * having the same identifier. If it has found the same entity 361 * already in the database, it will update the record. 362 * 363 * @param entitystored existing record in the database to update 364 * @param entity entity read from Excel 365 */ 366 def updateEntity(entitystored, entity) { 367 switch (entity.getClass()) { 368 case Study : break 369 case Subject : entitystored.properties = entity.properties 370 entitystored.save() 371 break 372 case Event : break 373 case Sample : break 374 case SamplingEvent : break 375 default : // unknown entity 376 return null 377 } 378 } 379 380 /** 381 * Method to persist entities into the database 382 * Checks whether entity already exists (based on identifier column 'name') 383 * 384 * @param entity entity object like Study, Subject, Protocol et cetera 385 * 386 */ 387 boolean persistEntity(entity) { 388 log.info ".import wizard persisting ${entity}" 389 390 try { 391 entity.save(flush:true) 392 return true 393 394 } catch (Exception e) { 395 def session = sessionFactory.currentSession 396 session.setFlushMode(org.hibernate.FlushMode.MANUAL) 397 log.error ".import wizard, failed to save entity:\n" + org.apache.commons.lang.exception.ExceptionUtils.getRootCauseMessage(e) 398 } 399 400 return true 401 } 33 def AuthenticationService 34 35 boolean transactional = true 36 37 /** 38 * @param is input stream representing the (workbook) resource 39 * @return high level representation of the workbook 40 */ 41 Workbook getWorkbook(InputStream is) { 42 WorkbookFactory.create(is) 43 } 44 45 /** 46 * @param wb high level representation of the workbook 47 * @param sheetindex sheet to use within the workbook 48 * @return header representation as a MappingColumn hashmap 49 */ 50 def getHeader(Workbook wb, int sheetindex, int headerrow, int datamatrix_start, theEntity = null) { 51 def sheet = wb.getSheetAt(sheetindex) 52 def sheetrow = sheet.getRow(datamatrix_start) 53 //def header = [] 54 def header = [:] 55 def df = new DataFormatter() 56 def property = new String() 57 58 //for (Cell c: sheet.getRow(datamatrix_start)) { 59 60 (0..sheetrow.getLastCellNum() - 1).each { columnindex -> 61 62 //def index = c.getColumnIndex() 63 def datamatrix_celltype = sheet.getRow(datamatrix_start).getCell(columnindex, Row.CREATE_NULL_AS_BLANK).getCellType() 64 def datamatrix_celldata = df.formatCellValue(sheet.getRow(datamatrix_start).getCell(columnindex)) 65 def datamatrix_cell = sheet.getRow(datamatrix_start).getCell(columnindex) 66 def headercell = sheet.getRow(headerrow - 1 + sheet.getFirstRowNum()).getCell(columnindex) 67 def tft = TemplateFieldType.STRING //default templatefield type 68 69 // Check for every celltype, currently redundant code, but possibly this will be 70 // a piece of custom code for every cell type like specific formatting 71 72 switch (datamatrix_celltype) { 73 case Cell.CELL_TYPE_STRING: 74 //parse cell value as double 75 def doubleBoolean = true 76 def fieldtype = TemplateFieldType.STRING 77 78 // is this string perhaps a double? 79 try { 80 formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) 81 } catch (NumberFormatException nfe) { doubleBoolean = false } 82 finally { 83 if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE 84 } 85 86 header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), 87 templatefieldtype: fieldtype, 88 index: columnindex, 89 entity: theEntity, 90 property: property); 91 92 break 93 case Cell.CELL_TYPE_NUMERIC: 94 def fieldtype = TemplateFieldType.LONG 95 def doubleBoolean = true 96 def longBoolean = true 97 98 // is this cell really an integer? 99 try { 100 Long.valueOf(datamatrix_celldata) 101 } catch (NumberFormatException nfe) { longBoolean = false } 102 finally { 103 if (longBoolean) fieldtype = TemplateFieldType.LONG 104 } 105 106 // it's not an long, perhaps a double? 107 if (!longBoolean) 108 try { 109 formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) 110 } catch (NumberFormatException nfe) { doubleBoolean = false } 111 finally { 112 if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE 113 } 114 115 if (DateUtil.isCellDateFormatted(datamatrix_cell)) fieldtype = TemplateFieldType.DATE 116 117 header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), 118 templatefieldtype: fieldtype, 119 index: columnindex, 120 entity: theEntity, 121 property: property); 122 break 123 case Cell.CELL_TYPE_BLANK: 124 header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), 125 templatefieldtype: TemplateFieldType.STRING, 126 index: columnindex, 127 entity: theEntity, 128 property: property); 129 break 130 default: 131 header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), 132 templatefieldtype: TemplateFieldType.STRING, 133 index: columnindex, 134 entity: theEntity, 135 property: property); 136 break 137 } // end of switch 138 } // end of cell loop 139 return header 140 } 141 142 /** 143 * This method is meant to return a matrix of the rows and columns 144 * used in the preview 145 * 146 * @param wb workbook object 147 * @param sheetindex sheet index used 148 * @param rows amount of rows returned 149 * @return two dimensional array (matrix) of Cell objects 150 */ 151 Object[][] getDatamatrix(Workbook wb, header, int sheetindex, int datamatrix_start, int count) { 152 def sheet = wb.getSheetAt(sheetindex) 153 def rows = [] 154 def df = new DataFormatter() 155 156 count = (count < sheet.getLastRowNum()) ? count : sheet.getLastRowNum() 157 158 // walk through all rows 159 ((datamatrix_start + sheet.getFirstRowNum())..count).each { rowindex -> 160 def row = [] 161 162 (0..header.size() - 1).each { columnindex -> 163 def c = sheet.getRow(rowindex).getCell(columnindex, Row.CREATE_NULL_AS_BLANK) 164 row.add(c) 165 } 166 167 rows.add(row) 168 } 169 170 return rows 171 } 172 173 /** 174 * This method will move a file to a new location. 175 * 176 * @param file File object to move 177 * @param folderpath folder to move the file to 178 * @param filename (new) filename to give 179 * @return if file has been moved succesful, the new path and filename will be returned, otherwise an empty string will be returned 180 */ 181 def moveFile(File file, String folderpath, String filename) { 182 try { 183 def rnd = ""; //System.currentTimeMillis() 184 file.transferTo(new File(folderpath, rnd + filename)) 185 return folderpath + filename 186 } catch (Exception exception) { 187 log.error "File move error, ${exception}" 188 return "" 189 } 190 } 191 192 /** 193 * @return random numeric value 194 */ 195 def random = { 196 return System.currentTimeMillis() + Runtime.runtime.freeMemory() 197 } 198 199 /** 200 * Method to read data from a workbook and to import data into a two dimensional 201 * array 202 * 203 * @param template_id template identifier to use fields from 204 * @param wb POI horrible spreadsheet formatted workbook object 205 * @param mcmap linked hashmap (preserved order) of MappingColumns 206 * @param sheetindex sheet to use when using multiple sheets 207 * @param rowindex first row to start with reading the actual data (NOT the header) 208 * @return two dimensional array containing records (with entities) 209 * 210 * @see dbnp.importer.MappingColumn 211 */ 212 def importData(template_id, Workbook wb, int sheetindex, int rowindex, mcmap) { 213 def sheet = wb.getSheetAt(sheetindex) 214 def template = Template.get(template_id) 215 def table = [] 216 def failedcells = [] // list of records 217 218 // walk through all rows and fill the table with records 219 (rowindex..sheet.getLastRowNum()).each { i -> 220 // Create an entity record based on a row read from Excel and store the cells which failed to be mapped 221 def (record, failed) = createRecord(template, sheet.getRow(i), mcmap) 222 223 // Add record with entity and its values to the table 224 table.add(record) 225 226 // If failed cells have been found, add them to the failed cells list 227 if (failed?.importcells?.size() > 0) failedcells.add(failed) 228 } 229 230 return [table, failedcells] 231 } 232 233 /** Method to put failed cells back into the datamatrix. Failed cells are cell values 234 * which could not be stored in an entity (e.g. Humu Supiuns in an ontology field). 235 * Empty corrections should not be stored 236 * 237 * @param datamatrix two dimensional array containing entities and possibly also failed cells 238 * @param failedcells list with maps of failed cells in [mappingcolumn, cell] format 239 * @param correctedcells map of corrected cells in [cellhashcode, value] format 240 * */ 241 def saveCorrectedCells(datamatrix, failedcells, correctedcells) { 242 243 // Loop through all failed cells (stored as 244 failedcells.each { record -> 245 record.value.importcells.each { cell -> 246 247 // Get the corrected value 248 def correctedvalue = correctedcells.find { it.key.toInteger() == cell.getIdentifier()}.value 249 250 // Find the record in the table which the mappingcolumn belongs to 251 def tablerecord = datamatrix.find { it.hashCode() == record.key } 252 253 // Loop through all entities in the record and correct them if necessary 254 tablerecord.each { rec -> 255 rec.each { entity -> 256 try { 257 // Update the entity field 258 entity.setFieldValue(cell.mappingcolumn.property, correctedvalue) 259 //println "Adjusted " + cell.mappingcolumn.property + " to " + correctedvalue 260 } 261 catch (Exception e) { 262 //println "Could not map corrected ontology: " + cell.mappingcolumn.property + " to " + correctedvalue 263 } 264 } 265 } // end of table record 266 } // end of cell record 267 } // end of failedlist 268 } 269 270 /** 271 * Method to store a matrix containing the entities in a record like structure. Every row in the table 272 * contains one or more entity objects (which contain fields with values). So actually a row represents 273 * a record with fields from one or more different entities. 274 * 275 * @param study entity Study 276 * @param datamatrix two dimensional array containing entities with values read from Excel file 277 */ 278 static saveDatamatrix(Study study, datamatrix) { 279 def validatedSuccesfully = 0 280 def entitystored = null 281 282 // Study passed? Sync data 283 if (study != null) study.refresh() 284 285 // go through the data matrix, read every record and validate the entity and try to persist it 286 datamatrix.each { record -> 287 record.each { entity -> 288 switch (entity.getClass()) { 289 case Study: println "Persisting Study `" + entity + "`: " 290 entity.owner = AuthenticationService.getLoggedInUser() 291 persistEntity(entity) 292 break 293 case Subject: println "Persisting Subject `" + entity + "`: " 294 295 // is the current entity not already in the database? 296 //entitystored = isEntityStored(entity) 297 298 // this entity is new, so add it to the study 299 //if (entitystored==null) 300 301 study.addToSubjects(entity) 302 303 break 304 case Event: println "Persisting Event `" + entity + "`: " 305 study.addToEvents(entity) 306 break 307 case Sample: println "Persisting Sample `" + entity + "`: " 308 309 // is this sample validatable (sample name unique for example?) 310 study.addToSamples(entity) 311 312 break 313 case SamplingEvent: println "Persisting SamplingEvent `" + entity + "`: " 314 study.addToSamplingEvents(entity) 315 break 316 default: println "Skipping persisting of `" + entity.getclass() + "`" 317 break 318 } // end switch 319 } // end record 320 } // end datamatrix 321 322 // validate study 323 if (study.validate()) { 324 if (!study.save(flush: true)) { 325 //this.appendErrors(flow.study, flash.wizardErrors) 326 throw new Exception('error saving study') 327 } 328 } else { 329 throw new Exception('study does not validate') 330 } 331 332 //persistEntity(study) 333 334 //return [validatedSuccesfully, updatedentities, failedtopersist] 335 //return [0,0,0] 336 return true 337 } 338 339 /** 340 * Check whether an entity already exist. A unique field in the entity is 341 * used to check whether the instantiated entity (read from Excel) is new. 342 * If the entity is found in the database it will be returned as is. 343 * 344 * @param entity entity object like a Study, Subject, Sample et cetera 345 * @return entity if found, otherwise null 346 */ 347 def isEntityStored(entity) { 348 switch (entity.getClass()) { 349 case Study: return Study.findByCode(entity.code) 350 break 351 case Subject: return Subject.findByParentAndName(entity.parent, entity.name) 352 break 353 case Event: break 354 case Sample: 355 break 356 case SamplingEvent: break 357 default: // unknown entity 358 return null 359 } 360 } 361 362 /** 363 * Find the entity and update the fields. The entity is an instance 364 * read from Excel. This method looks in the database for the entity 365 * having the same identifier. If it has found the same entity 366 * already in the database, it will update the record. 367 * 368 * @param entitystored existing record in the database to update 369 * @param entity entity read from Excel 370 */ 371 def updateEntity(entitystored, entity) { 372 switch (entity.getClass()) { 373 case Study: break 374 case Subject: entitystored.properties = entity.properties 375 entitystored.save() 376 break 377 case Event: break 378 case Sample: break 379 case SamplingEvent: break 380 default: // unknown entity 381 return null 382 } 383 } 384 385 /** 386 * Method to persist entities into the database 387 * Checks whether entity already exists (based on identifier column 'name') 388 * 389 * @param entity entity object like Study, Subject, Protocol et cetera 390 * 391 */ 392 boolean persistEntity(entity) { 393 println ".import wizard persisting ${entity}" 394 395 try { 396 entity.save(flush: true) 397 return true 398 399 } catch (Exception e) { 400 def session = sessionFactory.currentSession 401 session.setFlushMode(org.hibernate.FlushMode.MANUAL) 402 log.error ".import wizard, failed to save entity:\n" + org.apache.commons.lang.exception.ExceptionUtils.getRootCauseMessage(e) 403 } 404 405 return true 406 } 402 407 403 408 /** … … 407 412 * @param excelrow POI based Excel row containing the cells 408 413 * @param mcmap map containing MappingColumn objects 409 414 * @return list of entities and list of failed cells 410 415 */ 411 416 def createRecord(template, Row excelrow, mcmap) { 412 def df = new DataFormatter() 413 417 def df = new DataFormatter() 418 def tft = TemplateFieldType 414 419 def record = [] // list of entities and the read values 415 420 def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn 416 421 417 422 // Initialize all possible entities with the chosen template … … 426 431 // get the MappingColumn information of the current cell 427 432 def mc = mcmap[cell.getColumnIndex()] 428 def value 433 def value 429 434 430 435 // Check if column must be imported 431 if (mc !=null) if (!mc.dontimport) {436 if (mc != null) if (!mc.dontimport) { 432 437 try { 433 438 value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) … … 436 441 } 437 442 438 439 440 441 caseStudy: // does the entity already exist in the record? If not make it so.442 443 444 445 caseSubject: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(subject)446 447 448 caseSamplingEvent: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(samplingEvent)449 450 451 caseEvent: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(event)452 453 454 caseSample: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(sample)455 456 457 caseObject: // don't import458 459 460 461 462 463 464 465 466 case Study:identifier = study.getIdentifier()467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 failed.addToImportcells(new ImportCell(mappingcolumn:mcInstance, value:value, entityidentifier:identifier))483 443 try { 444 // which entity does the current cell (field) belong to? 445 switch (mc.entity) { 446 case Study: // does the entity already exist in the record? If not make it so. 447 (record.any {it.getClass() == mc.entity}) ? 0 : record.add(study) 448 study.setFieldValue(mc.property, value) 449 break 450 case Subject: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(subject) 451 subject.setFieldValue(mc.property, value) 452 break 453 case SamplingEvent: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(samplingEvent) 454 samplingEvent.setFieldValue(mc.property, value) 455 break 456 case Event: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(event) 457 event.setFieldValue(mc.property, value) 458 break 459 case Sample: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(sample) 460 sample.setFieldValue(mc.property, value) 461 break 462 case Object: // don't import 463 break 464 } // end switch 465 } catch (Exception iae) { 466 log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" 467 // store the mapping column and value which failed 468 def identifier 469 470 switch (mc.entity) { 471 case Study: identifier = study.getIdentifier() 472 break 473 case Subject: identifier = subject.getIdentifier() 474 break 475 case SamplingEvent: identifier = samplingEvent.getIdentifier() 476 break 477 case Event: identifier = event.getIdentifier() 478 break 479 case Sample: identifier = sample.getIdentifier() 480 break 481 case Object: // don't import 482 break 483 } 484 485 def mcInstance = new MappingColumn() 486 mcInstance.properties = mc.properties 487 failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) 488 } 484 489 } // end 485 490 } // end for 486 // a failed column means that using the entity.setFieldValue() threw an exception 487 488 489 490 491 492 493 494 495 496 497 case TemplateFieldType.STRING :return value.trim()498 case TemplateFieldType.TEXT :return value.trim()499 case TemplateFieldType.LONG :return (long) Double.valueOf(value)500 501 case TemplateFieldType.DOUBLE : return Double.valueOf(value.replace(",","."));502 case TemplateFieldType.STRINGLIST :return value.trim()503 case TemplateFieldType.ONTOLOGYTERM :return value.trim()504 case TemplateFieldType.DATE :return value505 default :return value506 507 508 509 510 511 static def similarity(l_seq, r_seq, degree=2) { 512 def l_histo = countNgramFrequency(l_seq, degree) 513 def r_histo = countNgramFrequency(r_seq, degree)514 515 dotProduct(l_histo, r_histo) / 516 Math.sqrt(dotProduct(l_histo, l_histo) * 517 dotProduct(r_histo, r_histo)) 518 } 519 520 static def countNgramFrequency(sequence, degree) { 521 def histo = [:] 522 def items = sequence.size() 523 524 for (int i = 0; i + degree <= items; i++) 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 static def stringSimilarity (l_str, r_str, degree=2) {541 542 543 544 545 546 547 static def mostSimilar(pattern, candidates, threshold=0) {548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 491 // a failed column means that using the entity.setFieldValue() threw an exception 492 return [record, failed] 493 } 494 495 /** 496 * Method to parse a value conform a specific type 497 * @param value string containing the value 498 * @return object corresponding to the TemplateFieldType 499 */ 500 def formatValue(String value, TemplateFieldType type) throws NumberFormatException { 501 switch (type) { 502 case TemplateFieldType.STRING: return value.trim() 503 case TemplateFieldType.TEXT: return value.trim() 504 case TemplateFieldType.LONG: return (long) Double.valueOf(value) 505 //case TemplateFieldType.FLOAT : return Float.valueOf(value.replace(",",".")); 506 case TemplateFieldType.DOUBLE: return Double.valueOf(value.replace(",", ".")); 507 case TemplateFieldType.STRINGLIST: return value.trim() 508 case TemplateFieldType.ONTOLOGYTERM: return value.trim() 509 case TemplateFieldType.DATE: return value 510 default: return value 511 } 512 } 513 514 // classes for fuzzy string matching 515 // <FUZZY MATCHING> 516 517 static def similarity(l_seq, r_seq, degree = 2) { 518 def l_histo = countNgramFrequency(l_seq, degree) 519 def r_histo = countNgramFrequency(r_seq, degree) 520 521 dotProduct(l_histo, r_histo) / 522 Math.sqrt(dotProduct(l_histo, l_histo) * 523 dotProduct(r_histo, r_histo)) 524 } 525 526 static def countNgramFrequency(sequence, degree) { 527 def histo = [:] 528 def items = sequence.size() 529 530 for (int i = 0; i + degree <= items; i++) { 531 def gram = sequence[i..<(i + degree)] 532 histo[gram] = 1 + histo.get(gram, 0) 533 } 534 histo 535 } 536 537 static def dotProduct(l_histo, r_histo) { 538 def sum = 0 539 l_histo.each { key, value -> 540 sum = sum + l_histo[key] * r_histo.get(key, 0) 541 } 542 sum 543 } 544 545 static def stringSimilarity(l_str, r_str, degree = 2) { 546 547 similarity(l_str.toString().toLowerCase().toCharArray(), 548 r_str.toString().toLowerCase().toCharArray(), 549 degree) 550 } 551 552 static def mostSimilar(pattern, candidates, threshold = 0) { 553 def topScore = 0 554 def bestFit = null 555 556 candidates.each { candidate -> 557 def score = stringSimilarity(pattern, candidate) 558 if (score > topScore) { 559 topScore = score 560 bestFit = candidate 561 } 562 } 563 564 if (topScore < threshold) 565 bestFit = null 566 567 bestFit 568 } 569 // </FUZZY MATCHING> 565 570 566 571 }
Note: See TracChangeset
for help on using the changeset viewer.