[147] | 1 | /** |
---|
| 2 | * Importer service |
---|
| 3 | * |
---|
| 4 | * The importer service handles the import of tabular, comma delimited and Excel format |
---|
| 5 | * based files. |
---|
| 6 | * |
---|
[1417] | 7 | * @package importer |
---|
| 8 | * @author t.w.abma@umcutrecht.nl |
---|
| 9 | * @since 20100126 |
---|
[147] | 10 | * |
---|
| 11 | * Revision information: |
---|
| 12 | * $Rev: 1417 $ |
---|
| 13 | * $Author: work@osx.eu $ |
---|
| 14 | * $Date: 2011-01-20 15:10:50 +0000 (do, 20 jan 2011) $ |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | package dbnp.importer |
---|
[1417] | 18 | |
---|
[1087] | 19 | import org.apache.poi.ss.usermodel.* |
---|
[1315] | 20 | import org.apache.poi.xssf.usermodel.XSSFCell |
---|
[350] | 21 | |
---|
[255] | 22 | import dbnp.studycapturing.TemplateFieldType |
---|
[268] | 23 | import dbnp.studycapturing.Template |
---|
[729] | 24 | import dbnp.studycapturing.SamplingEvent |
---|
[259] | 25 | import dbnp.studycapturing.Study |
---|
| 26 | import dbnp.studycapturing.Subject |
---|
| 27 | import dbnp.studycapturing.Event |
---|
| 28 | import dbnp.studycapturing.Sample |
---|
[1417] | 29 | import dbnp.data.Term |
---|
| 30 | import org.apache.commons.lang.RandomStringUtils |
---|
[147] | 31 | |
---|
| 32 | class ImporterService { |
---|
[1417] | 33 | def AuthenticationService |
---|
[147] | 34 | |
---|
[1417] | 35 | boolean transactional = true |
---|
[147] | 36 | |
---|
[1417] | 37 | /** |
---|
| 38 | * @param is input stream representing the (workbook) resource |
---|
| 39 | * @return high level representation of the workbook |
---|
| 40 | */ |
---|
| 41 | Workbook getWorkbook(InputStream is) { |
---|
| 42 | WorkbookFactory.create(is) |
---|
| 43 | } |
---|
[147] | 44 | |
---|
[1417] | 45 | /** |
---|
| 46 | * @param wb high level representation of the workbook |
---|
| 47 | * @param sheetindex sheet to use within the workbook |
---|
| 48 | * @return header representation as a MappingColumn hashmap |
---|
| 49 | */ |
---|
| 50 | def getHeader(Workbook wb, int sheetindex, int headerrow, int datamatrix_start, theEntity = null) { |
---|
| 51 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 52 | def sheetrow = sheet.getRow(datamatrix_start) |
---|
| 53 | //def header = [] |
---|
| 54 | def header = [:] |
---|
| 55 | def df = new DataFormatter() |
---|
| 56 | def property = new String() |
---|
[169] | 57 | |
---|
[1417] | 58 | //for (Cell c: sheet.getRow(datamatrix_start)) { |
---|
[147] | 59 | |
---|
[1417] | 60 | (0..sheetrow.getLastCellNum() - 1).each { columnindex -> |
---|
[655] | 61 | |
---|
[1417] | 62 | //def index = c.getColumnIndex() |
---|
| 63 | def datamatrix_celltype = sheet.getRow(datamatrix_start).getCell(columnindex, Row.CREATE_NULL_AS_BLANK).getCellType() |
---|
| 64 | def datamatrix_celldata = df.formatCellValue(sheet.getRow(datamatrix_start).getCell(columnindex)) |
---|
| 65 | def datamatrix_cell = sheet.getRow(datamatrix_start).getCell(columnindex) |
---|
| 66 | def headercell = sheet.getRow(headerrow - 1 + sheet.getFirstRowNum()).getCell(columnindex) |
---|
| 67 | def tft = TemplateFieldType.STRING //default templatefield type |
---|
[634] | 68 | |
---|
[1417] | 69 | // Check for every celltype, currently redundant code, but possibly this will be |
---|
| 70 | // a piece of custom code for every cell type like specific formatting |
---|
[534] | 71 | |
---|
[1417] | 72 | switch (datamatrix_celltype) { |
---|
| 73 | case Cell.CELL_TYPE_STRING: |
---|
| 74 | //parse cell value as double |
---|
| 75 | def doubleBoolean = true |
---|
| 76 | def fieldtype = TemplateFieldType.STRING |
---|
[534] | 77 | |
---|
[1417] | 78 | // is this string perhaps a double? |
---|
| 79 | try { |
---|
| 80 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
| 81 | } catch (NumberFormatException nfe) { doubleBoolean = false } |
---|
| 82 | finally { |
---|
| 83 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
| 84 | } |
---|
[545] | 85 | |
---|
[1417] | 86 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
| 87 | templatefieldtype: fieldtype, |
---|
| 88 | index: columnindex, |
---|
| 89 | entity: theEntity, |
---|
| 90 | property: property); |
---|
[634] | 91 | |
---|
[1417] | 92 | break |
---|
| 93 | case Cell.CELL_TYPE_NUMERIC: |
---|
| 94 | def fieldtype = TemplateFieldType.LONG |
---|
| 95 | def doubleBoolean = true |
---|
| 96 | def longBoolean = true |
---|
[634] | 97 | |
---|
[1417] | 98 | // is this cell really an integer? |
---|
| 99 | try { |
---|
| 100 | Long.valueOf(datamatrix_celldata) |
---|
| 101 | } catch (NumberFormatException nfe) { longBoolean = false } |
---|
| 102 | finally { |
---|
| 103 | if (longBoolean) fieldtype = TemplateFieldType.LONG |
---|
| 104 | } |
---|
[634] | 105 | |
---|
[1417] | 106 | // it's not an long, perhaps a double? |
---|
| 107 | if (!longBoolean) |
---|
| 108 | try { |
---|
| 109 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
| 110 | } catch (NumberFormatException nfe) { doubleBoolean = false } |
---|
| 111 | finally { |
---|
| 112 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
| 113 | } |
---|
[706] | 114 | |
---|
[1417] | 115 | if (DateUtil.isCellDateFormatted(datamatrix_cell)) fieldtype = TemplateFieldType.DATE |
---|
[147] | 116 | |
---|
[1417] | 117 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
| 118 | templatefieldtype: fieldtype, |
---|
| 119 | index: columnindex, |
---|
| 120 | entity: theEntity, |
---|
| 121 | property: property); |
---|
| 122 | break |
---|
| 123 | case Cell.CELL_TYPE_BLANK: |
---|
| 124 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
| 125 | templatefieldtype: TemplateFieldType.STRING, |
---|
| 126 | index: columnindex, |
---|
| 127 | entity: theEntity, |
---|
| 128 | property: property); |
---|
| 129 | break |
---|
| 130 | default: |
---|
| 131 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
| 132 | templatefieldtype: TemplateFieldType.STRING, |
---|
| 133 | index: columnindex, |
---|
| 134 | entity: theEntity, |
---|
| 135 | property: property); |
---|
| 136 | break |
---|
| 137 | } // end of switch |
---|
| 138 | } // end of cell loop |
---|
| 139 | return header |
---|
| 140 | } |
---|
[169] | 141 | |
---|
[1417] | 142 | /** |
---|
| 143 | * This method is meant to return a matrix of the rows and columns |
---|
| 144 | * used in the preview |
---|
| 145 | * |
---|
| 146 | * @param wb workbook object |
---|
| 147 | * @param sheetindex sheet index used |
---|
| 148 | * @param rows amount of rows returned |
---|
| 149 | * @return two dimensional array (matrix) of Cell objects |
---|
| 150 | */ |
---|
| 151 | Object[][] getDatamatrix(Workbook wb, header, int sheetindex, int datamatrix_start, int count) { |
---|
| 152 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 153 | def rows = [] |
---|
| 154 | def df = new DataFormatter() |
---|
[169] | 155 | |
---|
[1417] | 156 | count = (count < sheet.getLastRowNum()) ? count : sheet.getLastRowNum() |
---|
[1122] | 157 | |
---|
[1417] | 158 | // walk through all rows |
---|
| 159 | ((datamatrix_start + sheet.getFirstRowNum())..count).each { rowindex -> |
---|
| 160 | def row = [] |
---|
[169] | 161 | |
---|
[1417] | 162 | (0..header.size() - 1).each { columnindex -> |
---|
| 163 | def c = sheet.getRow(rowindex).getCell(columnindex, Row.CREATE_NULL_AS_BLANK) |
---|
| 164 | row.add(c) |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | rows.add(row) |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | return rows |
---|
[1122] | 171 | } |
---|
[169] | 172 | |
---|
[1417] | 173 | /** |
---|
| 174 | * This method will move a file to a new location. |
---|
| 175 | * |
---|
| 176 | * @param file File object to move |
---|
| 177 | * @param folderpath folder to move the file to |
---|
| 178 | * @param filename (new) filename to give |
---|
| 179 | * @return if file has been moved succesful, the new path and filename will be returned, otherwise an empty string will be returned |
---|
| 180 | */ |
---|
| 181 | def moveFile(File file, String folderpath, String filename) { |
---|
| 182 | try { |
---|
| 183 | def rnd = ""; //System.currentTimeMillis() |
---|
| 184 | file.transferTo(new File(folderpath, rnd + filename)) |
---|
| 185 | return folderpath + filename |
---|
| 186 | } catch (Exception exception) { |
---|
| 187 | log.error "File move error, ${exception}" |
---|
| 188 | return "" |
---|
| 189 | } |
---|
| 190 | } |
---|
[169] | 191 | |
---|
[1417] | 192 | /** |
---|
| 193 | * @return random numeric value |
---|
| 194 | */ |
---|
| 195 | def random = { |
---|
| 196 | return System.currentTimeMillis() + Runtime.runtime.freeMemory() |
---|
| 197 | } |
---|
| 198 | |
---|
| 199 | /** |
---|
| 200 | * Method to read data from a workbook and to import data into a two dimensional |
---|
| 201 | * array |
---|
| 202 | * |
---|
| 203 | * @param template_id template identifier to use fields from |
---|
| 204 | * @param wb POI horrible spreadsheet formatted workbook object |
---|
| 205 | * @param mcmap linked hashmap (preserved order) of MappingColumns |
---|
| 206 | * @param sheetindex sheet to use when using multiple sheets |
---|
| 207 | * @param rowindex first row to start with reading the actual data (NOT the header) |
---|
| 208 | * @return two dimensional array containing records (with entities) |
---|
| 209 | * |
---|
| 210 | * @see dbnp.importer.MappingColumn |
---|
| 211 | */ |
---|
| 212 | def importData(template_id, Workbook wb, int sheetindex, int rowindex, mcmap) { |
---|
| 213 | def sheet = wb.getSheetAt(sheetindex) |
---|
| 214 | def template = Template.get(template_id) |
---|
| 215 | def table = [] |
---|
| 216 | def failedcells = [] // list of records |
---|
| 217 | |
---|
| 218 | // walk through all rows and fill the table with records |
---|
| 219 | (rowindex..sheet.getLastRowNum()).each { i -> |
---|
| 220 | // Create an entity record based on a row read from Excel and store the cells which failed to be mapped |
---|
| 221 | def (record, failed) = createRecord(template, sheet.getRow(i), mcmap) |
---|
| 222 | |
---|
| 223 | // Add record with entity and its values to the table |
---|
| 224 | table.add(record) |
---|
| 225 | |
---|
| 226 | // If failed cells have been found, add them to the failed cells list |
---|
| 227 | if (failed?.importcells?.size() > 0) failedcells.add(failed) |
---|
[147] | 228 | } |
---|
| 229 | |
---|
[1417] | 230 | return [table, failedcells] |
---|
[147] | 231 | } |
---|
[231] | 232 | |
---|
[1417] | 233 | /** Method to put failed cells back into the datamatrix. Failed cells are cell values |
---|
| 234 | * which could not be stored in an entity (e.g. Humu Supiuns in an ontology field). |
---|
| 235 | * Empty corrections should not be stored |
---|
| 236 | * |
---|
| 237 | * @param datamatrix two dimensional array containing entities and possibly also failed cells |
---|
| 238 | * @param failedcells list with maps of failed cells in [mappingcolumn, cell] format |
---|
| 239 | * @param correctedcells map of corrected cells in [cellhashcode, value] format |
---|
| 240 | * */ |
---|
| 241 | def saveCorrectedCells(datamatrix, failedcells, correctedcells) { |
---|
[1093] | 242 | |
---|
[1417] | 243 | // Loop through all failed cells (stored as |
---|
| 244 | failedcells.each { record -> |
---|
| 245 | record.value.importcells.each { cell -> |
---|
[1093] | 246 | |
---|
[1417] | 247 | // Get the corrected value |
---|
| 248 | def correctedvalue = correctedcells.find { it.key.toInteger() == cell.getIdentifier()}.value |
---|
| 249 | |
---|
| 250 | // Find the record in the table which the mappingcolumn belongs to |
---|
| 251 | def tablerecord = datamatrix.find { it.hashCode() == record.key } |
---|
| 252 | |
---|
| 253 | // Loop through all entities in the record and correct them if necessary |
---|
| 254 | tablerecord.each { rec -> |
---|
| 255 | rec.each { entity -> |
---|
| 256 | try { |
---|
| 257 | // Update the entity field |
---|
| 258 | entity.setFieldValue(cell.mappingcolumn.property, correctedvalue) |
---|
| 259 | //println "Adjusted " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
| 260 | } |
---|
| 261 | catch (Exception e) { |
---|
| 262 | //println "Could not map corrected ontology: " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
| 263 | } |
---|
| 264 | } |
---|
| 265 | } // end of table record |
---|
| 266 | } // end of cell record |
---|
| 267 | } // end of failedlist |
---|
[274] | 268 | } |
---|
[1050] | 269 | |
---|
[1417] | 270 | /** |
---|
| 271 | * Method to store a matrix containing the entities in a record like structure. Every row in the table |
---|
| 272 | * contains one or more entity objects (which contain fields with values). So actually a row represents |
---|
| 273 | * a record with fields from one or more different entities. |
---|
| 274 | * |
---|
| 275 | * @param study entity Study |
---|
| 276 | * @param datamatrix two dimensional array containing entities with values read from Excel file |
---|
| 277 | */ |
---|
| 278 | static saveDatamatrix(Study study, datamatrix) { |
---|
| 279 | def validatedSuccesfully = 0 |
---|
| 280 | def entitystored = null |
---|
[1083] | 281 | |
---|
[1417] | 282 | // Study passed? Sync data |
---|
| 283 | if (study != null) study.refresh() |
---|
[1103] | 284 | |
---|
[1417] | 285 | // go through the data matrix, read every record and validate the entity and try to persist it |
---|
| 286 | datamatrix.each { record -> |
---|
| 287 | record.each { entity -> |
---|
| 288 | switch (entity.getClass()) { |
---|
| 289 | case Study: println "Persisting Study `" + entity + "`: " |
---|
| 290 | entity.owner = AuthenticationService.getLoggedInUser() |
---|
| 291 | persistEntity(entity) |
---|
| 292 | break |
---|
| 293 | case Subject: println "Persisting Subject `" + entity + "`: " |
---|
[1083] | 294 | |
---|
[1417] | 295 | // is the current entity not already in the database? |
---|
| 296 | //entitystored = isEntityStored(entity) |
---|
[1103] | 297 | |
---|
[1417] | 298 | // this entity is new, so add it to the study |
---|
| 299 | //if (entitystored==null) |
---|
[1411] | 300 | |
---|
[1417] | 301 | study.addToSubjects(entity) |
---|
[1277] | 302 | |
---|
[1417] | 303 | break |
---|
| 304 | case Event: println "Persisting Event `" + entity + "`: " |
---|
| 305 | study.addToEvents(entity) |
---|
| 306 | break |
---|
| 307 | case Sample: println "Persisting Sample `" + entity + "`: " |
---|
[1411] | 308 | |
---|
[1417] | 309 | // is this sample validatable (sample name unique for example?) |
---|
| 310 | study.addToSamples(entity) |
---|
[1411] | 311 | |
---|
[1417] | 312 | break |
---|
| 313 | case SamplingEvent: println "Persisting SamplingEvent `" + entity + "`: " |
---|
| 314 | study.addToSamplingEvents(entity) |
---|
| 315 | break |
---|
| 316 | default: println "Skipping persisting of `" + entity.getclass() + "`" |
---|
| 317 | break |
---|
| 318 | } // end switch |
---|
| 319 | } // end record |
---|
| 320 | } // end datamatrix |
---|
[284] | 321 | |
---|
[1417] | 322 | // validate study |
---|
| 323 | if (study.validate()) { |
---|
| 324 | if (!study.save(flush: true)) { |
---|
| 325 | //this.appendErrors(flow.study, flash.wizardErrors) |
---|
| 326 | throw new Exception('error saving study') |
---|
| 327 | } |
---|
| 328 | } else { |
---|
| 329 | throw new Exception('study does not validate') |
---|
| 330 | } |
---|
[1002] | 331 | |
---|
[1417] | 332 | //persistEntity(study) |
---|
[1002] | 333 | |
---|
[1417] | 334 | //return [validatedSuccesfully, updatedentities, failedtopersist] |
---|
| 335 | //return [0,0,0] |
---|
| 336 | return true |
---|
| 337 | } |
---|
[1411] | 338 | |
---|
[1417] | 339 | /** |
---|
| 340 | * Check whether an entity already exist. A unique field in the entity is |
---|
| 341 | * used to check whether the instantiated entity (read from Excel) is new. |
---|
| 342 | * If the entity is found in the database it will be returned as is. |
---|
| 343 | * |
---|
| 344 | * @param entity entity object like a Study, Subject, Sample et cetera |
---|
| 345 | * @return entity if found, otherwise null |
---|
| 346 | */ |
---|
| 347 | def isEntityStored(entity) { |
---|
| 348 | switch (entity.getClass()) { |
---|
| 349 | case Study: return Study.findByCode(entity.code) |
---|
| 350 | break |
---|
| 351 | case Subject: return Subject.findByParentAndName(entity.parent, entity.name) |
---|
| 352 | break |
---|
| 353 | case Event: break |
---|
| 354 | case Sample: |
---|
| 355 | break |
---|
| 356 | case SamplingEvent: break |
---|
| 357 | default: // unknown entity |
---|
| 358 | return null |
---|
| 359 | } |
---|
| 360 | } |
---|
[1411] | 361 | |
---|
[1417] | 362 | /** |
---|
| 363 | * Find the entity and update the fields. The entity is an instance |
---|
| 364 | * read from Excel. This method looks in the database for the entity |
---|
| 365 | * having the same identifier. If it has found the same entity |
---|
| 366 | * already in the database, it will update the record. |
---|
| 367 | * |
---|
| 368 | * @param entitystored existing record in the database to update |
---|
| 369 | * @param entity entity read from Excel |
---|
| 370 | */ |
---|
| 371 | def updateEntity(entitystored, entity) { |
---|
| 372 | switch (entity.getClass()) { |
---|
| 373 | case Study: break |
---|
| 374 | case Subject: entitystored.properties = entity.properties |
---|
| 375 | entitystored.save() |
---|
| 376 | break |
---|
| 377 | case Event: break |
---|
| 378 | case Sample: break |
---|
| 379 | case SamplingEvent: break |
---|
| 380 | default: // unknown entity |
---|
| 381 | return null |
---|
| 382 | } |
---|
| 383 | } |
---|
[1411] | 384 | |
---|
[1417] | 385 | /** |
---|
| 386 | * Method to persist entities into the database |
---|
| 387 | * Checks whether entity already exists (based on identifier column 'name') |
---|
| 388 | * |
---|
| 389 | * @param entity entity object like Study, Subject, Protocol et cetera |
---|
| 390 | * |
---|
| 391 | */ |
---|
| 392 | boolean persistEntity(entity) { |
---|
| 393 | println ".import wizard persisting ${entity}" |
---|
[283] | 394 | |
---|
[1417] | 395 | try { |
---|
| 396 | entity.save(flush: true) |
---|
| 397 | return true |
---|
| 398 | |
---|
| 399 | } catch (Exception e) { |
---|
| 400 | def session = sessionFactory.currentSession |
---|
| 401 | session.setFlushMode(org.hibernate.FlushMode.MANUAL) |
---|
| 402 | log.error ".import wizard, failed to save entity:\n" + org.apache.commons.lang.exception.ExceptionUtils.getRootCauseMessage(e) |
---|
| 403 | } |
---|
| 404 | |
---|
| 405 | return true |
---|
| 406 | } |
---|
| 407 | |
---|
[725] | 408 | /** |
---|
| 409 | * This method creates a record (array) containing entities with values |
---|
| 410 | * |
---|
| 411 | * @param template_id template identifier |
---|
| 412 | * @param excelrow POI based Excel row containing the cells |
---|
| 413 | * @param mcmap map containing MappingColumn objects |
---|
[1417] | 414 | * @return list of entities and list of failed cells |
---|
[725] | 415 | */ |
---|
[1411] | 416 | def createRecord(template, Row excelrow, mcmap) { |
---|
[1417] | 417 | def df = new DataFormatter() |
---|
| 418 | def tft = TemplateFieldType |
---|
[1093] | 419 | def record = [] // list of entities and the read values |
---|
[1417] | 420 | def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn |
---|
[273] | 421 | |
---|
[735] | 422 | // Initialize all possible entities with the chosen template |
---|
[725] | 423 | def study = new Study(template: template) |
---|
| 424 | def subject = new Subject(template: template) |
---|
| 425 | def samplingEvent = new SamplingEvent(template: template) |
---|
| 426 | def event = new Event(template: template) |
---|
| 427 | def sample = new Sample(template: template) |
---|
[273] | 428 | |
---|
[735] | 429 | // Go through the Excel row cell by cell |
---|
[1087] | 430 | for (Cell cell: excelrow) { |
---|
[735] | 431 | // get the MappingColumn information of the current cell |
---|
[725] | 432 | def mc = mcmap[cell.getColumnIndex()] |
---|
[1417] | 433 | def value |
---|
[666] | 434 | |
---|
[725] | 435 | // Check if column must be imported |
---|
[1417] | 436 | if (mc != null) if (!mc.dontimport) { |
---|
[725] | 437 | try { |
---|
| 438 | value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) |
---|
| 439 | } catch (NumberFormatException nfe) { |
---|
| 440 | value = "" |
---|
| 441 | } |
---|
[274] | 442 | |
---|
[1417] | 443 | try { |
---|
| 444 | // which entity does the current cell (field) belong to? |
---|
| 445 | switch (mc.entity) { |
---|
| 446 | case Study: // does the entity already exist in the record? If not make it so. |
---|
| 447 | (record.any {it.getClass() == mc.entity}) ? 0 : record.add(study) |
---|
| 448 | study.setFieldValue(mc.property, value) |
---|
| 449 | break |
---|
| 450 | case Subject: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(subject) |
---|
| 451 | subject.setFieldValue(mc.property, value) |
---|
| 452 | break |
---|
| 453 | case SamplingEvent: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(samplingEvent) |
---|
| 454 | samplingEvent.setFieldValue(mc.property, value) |
---|
| 455 | break |
---|
| 456 | case Event: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(event) |
---|
| 457 | event.setFieldValue(mc.property, value) |
---|
| 458 | break |
---|
| 459 | case Sample: (record.any {it.getClass() == mc.entity}) ? 0 : record.add(sample) |
---|
| 460 | sample.setFieldValue(mc.property, value) |
---|
| 461 | break |
---|
| 462 | case Object: // don't import |
---|
| 463 | break |
---|
| 464 | } // end switch |
---|
| 465 | } catch (Exception iae) { |
---|
| 466 | log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" |
---|
| 467 | // store the mapping column and value which failed |
---|
| 468 | def identifier |
---|
[1202] | 469 | |
---|
[1417] | 470 | switch (mc.entity) { |
---|
| 471 | case Study: identifier = study.getIdentifier() |
---|
| 472 | break |
---|
| 473 | case Subject: identifier = subject.getIdentifier() |
---|
| 474 | break |
---|
| 475 | case SamplingEvent: identifier = samplingEvent.getIdentifier() |
---|
| 476 | break |
---|
| 477 | case Event: identifier = event.getIdentifier() |
---|
| 478 | break |
---|
| 479 | case Sample: identifier = sample.getIdentifier() |
---|
| 480 | break |
---|
| 481 | case Object: // don't import |
---|
| 482 | break |
---|
| 483 | } |
---|
| 484 | |
---|
| 485 | def mcInstance = new MappingColumn() |
---|
| 486 | mcInstance.properties = mc.properties |
---|
| 487 | failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) |
---|
| 488 | } |
---|
[725] | 489 | } // end |
---|
| 490 | } // end for |
---|
[1417] | 491 | // a failed column means that using the entity.setFieldValue() threw an exception |
---|
| 492 | return [record, failed] |
---|
| 493 | } |
---|
[297] | 494 | |
---|
[1417] | 495 | /** |
---|
| 496 | * Method to parse a value conform a specific type |
---|
| 497 | * @param value string containing the value |
---|
| 498 | * @return object corresponding to the TemplateFieldType |
---|
| 499 | */ |
---|
| 500 | def formatValue(String value, TemplateFieldType type) throws NumberFormatException { |
---|
| 501 | switch (type) { |
---|
| 502 | case TemplateFieldType.STRING: return value.trim() |
---|
| 503 | case TemplateFieldType.TEXT: return value.trim() |
---|
| 504 | case TemplateFieldType.LONG: return (long) Double.valueOf(value) |
---|
| 505 | //case TemplateFieldType.FLOAT : return Float.valueOf(value.replace(",",".")); |
---|
| 506 | case TemplateFieldType.DOUBLE: return Double.valueOf(value.replace(",", ".")); |
---|
| 507 | case TemplateFieldType.STRINGLIST: return value.trim() |
---|
| 508 | case TemplateFieldType.ONTOLOGYTERM: return value.trim() |
---|
| 509 | case TemplateFieldType.DATE: return value |
---|
| 510 | default: return value |
---|
| 511 | } |
---|
| 512 | } |
---|
[684] | 513 | |
---|
[1417] | 514 | // classes for fuzzy string matching |
---|
| 515 | // <FUZZY MATCHING> |
---|
[1126] | 516 | |
---|
[1417] | 517 | static def similarity(l_seq, r_seq, degree = 2) { |
---|
| 518 | def l_histo = countNgramFrequency(l_seq, degree) |
---|
| 519 | def r_histo = countNgramFrequency(r_seq, degree) |
---|
[1126] | 520 | |
---|
[1417] | 521 | dotProduct(l_histo, r_histo) / |
---|
| 522 | Math.sqrt(dotProduct(l_histo, l_histo) * |
---|
| 523 | dotProduct(r_histo, r_histo)) |
---|
| 524 | } |
---|
[1126] | 525 | |
---|
[1417] | 526 | static def countNgramFrequency(sequence, degree) { |
---|
| 527 | def histo = [:] |
---|
| 528 | def items = sequence.size() |
---|
[1126] | 529 | |
---|
[1417] | 530 | for (int i = 0; i + degree <= items; i++) { |
---|
| 531 | def gram = sequence[i..<(i + degree)] |
---|
| 532 | histo[gram] = 1 + histo.get(gram, 0) |
---|
| 533 | } |
---|
| 534 | histo |
---|
| 535 | } |
---|
[1126] | 536 | |
---|
[1417] | 537 | static def dotProduct(l_histo, r_histo) { |
---|
| 538 | def sum = 0 |
---|
| 539 | l_histo.each { key, value -> |
---|
| 540 | sum = sum + l_histo[key] * r_histo.get(key, 0) |
---|
| 541 | } |
---|
| 542 | sum |
---|
| 543 | } |
---|
[1126] | 544 | |
---|
[1417] | 545 | static def stringSimilarity(l_str, r_str, degree = 2) { |
---|
[1126] | 546 | |
---|
[1417] | 547 | similarity(l_str.toString().toLowerCase().toCharArray(), |
---|
| 548 | r_str.toString().toLowerCase().toCharArray(), |
---|
| 549 | degree) |
---|
| 550 | } |
---|
[1126] | 551 | |
---|
[1417] | 552 | static def mostSimilar(pattern, candidates, threshold = 0) { |
---|
| 553 | def topScore = 0 |
---|
| 554 | def bestFit = null |
---|
[1126] | 555 | |
---|
[1417] | 556 | candidates.each { candidate -> |
---|
| 557 | def score = stringSimilarity(pattern, candidate) |
---|
| 558 | if (score > topScore) { |
---|
| 559 | topScore = score |
---|
| 560 | bestFit = candidate |
---|
| 561 | } |
---|
| 562 | } |
---|
[1126] | 563 | |
---|
[1417] | 564 | if (topScore < threshold) |
---|
| 565 | bestFit = null |
---|
| 566 | |
---|
| 567 | bestFit |
---|
| 568 | } |
---|
| 569 | // </FUZZY MATCHING> |
---|
| 570 | |
---|
[147] | 571 | } |
---|