1 | /** |
---|
2 | * Importer service |
---|
3 | * |
---|
4 | * The importer service handles the import of tabular, comma delimited and Excel format |
---|
5 | * based files. |
---|
6 | * |
---|
7 | * @package importer |
---|
8 | * @author t.w.abma@umcutrecht.nl |
---|
9 | * @since 20100126 |
---|
10 | * |
---|
11 | * Revision information: |
---|
12 | * $Rev: 1610 $ |
---|
13 | * $Author: robert@isdat.nl $ |
---|
14 | * $Date: 2011-03-09 20:28:13 +0000 (wo, 09 mrt 2011) $ |
---|
15 | */ |
---|
16 | package dbnp.importer |
---|
17 | |
---|
18 | import org.dbnp.gdt.* |
---|
19 | import org.apache.poi.ss.usermodel.* |
---|
20 | import dbnp.studycapturing.* |
---|
21 | |
---|
22 | class ImporterService { |
---|
23 | def authenticationService |
---|
24 | |
---|
25 | static transactional = false |
---|
26 | |
---|
27 | /** |
---|
28 | * @param is input stream representing the (workbook) resource |
---|
29 | * @return high level representation of the workbook |
---|
30 | */ |
---|
31 | Workbook getWorkbook(InputStream is) { |
---|
32 | WorkbookFactory.create(is) |
---|
33 | } |
---|
34 | |
---|
35 | /** |
---|
36 | * @param wb high level representation of the workbook |
---|
37 | * @param sheetindex sheet to use within the workbook |
---|
38 | * @return header representation as a MappingColumn hashmap |
---|
39 | */ |
---|
40 | def getHeader(Workbook wb, int sheetindex, int headerrow, int datamatrix_start, theEntity = null) { |
---|
41 | def sheet = wb.getSheetAt(sheetindex) |
---|
42 | def sheetrow = sheet.getRow(datamatrix_start) |
---|
43 | //def header = [] |
---|
44 | def header = [] |
---|
45 | def df = new DataFormatter() |
---|
46 | def property = new String() |
---|
47 | |
---|
48 | //for (Cell c: sheet.getRow(datamatrix_start)) { |
---|
49 | |
---|
50 | (0..sheetrow.getLastCellNum() - 1).each { columnindex -> |
---|
51 | |
---|
52 | //def index = c.getColumnIndex() |
---|
53 | def datamatrix_celltype = sheet.getRow(datamatrix_start).getCell(columnindex, Row.CREATE_NULL_AS_BLANK).getCellType() |
---|
54 | def datamatrix_celldata = df.formatCellValue(sheet.getRow(datamatrix_start).getCell(columnindex)) |
---|
55 | def datamatrix_cell = sheet.getRow(datamatrix_start).getCell(columnindex) |
---|
56 | def headercell = sheet.getRow(headerrow - 1 + sheet.getFirstRowNum()).getCell(columnindex) |
---|
57 | def tft = TemplateFieldType.STRING //default templatefield type |
---|
58 | |
---|
59 | // Check for every celltype, currently redundant code, but possibly this will be |
---|
60 | // a piece of custom code for every cell type like specific formatting |
---|
61 | |
---|
62 | switch (datamatrix_celltype) { |
---|
63 | case Cell.CELL_TYPE_STRING: |
---|
64 | //parse cell value as double |
---|
65 | def doubleBoolean = true |
---|
66 | def fieldtype = TemplateFieldType.STRING |
---|
67 | |
---|
68 | // is this string perhaps a double? |
---|
69 | try { |
---|
70 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
71 | } catch (NumberFormatException nfe) { |
---|
72 | doubleBoolean = false |
---|
73 | } |
---|
74 | finally { |
---|
75 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
76 | } |
---|
77 | |
---|
78 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
79 | templatefieldtype: fieldtype, |
---|
80 | index: columnindex, |
---|
81 | entityclass: theEntity, |
---|
82 | property: property); |
---|
83 | |
---|
84 | break |
---|
85 | case Cell.CELL_TYPE_NUMERIC: |
---|
86 | def fieldtype = TemplateFieldType.LONG |
---|
87 | def doubleBoolean = true |
---|
88 | def longBoolean = true |
---|
89 | |
---|
90 | // is this cell really an integer? |
---|
91 | try { |
---|
92 | Long.valueOf(datamatrix_celldata) |
---|
93 | } catch (NumberFormatException nfe) { |
---|
94 | longBoolean = false |
---|
95 | } |
---|
96 | finally { |
---|
97 | if (longBoolean) fieldtype = TemplateFieldType.LONG |
---|
98 | } |
---|
99 | |
---|
100 | // it's not an long, perhaps a double? |
---|
101 | if (!longBoolean) |
---|
102 | try { |
---|
103 | formatValue(datamatrix_celldata, TemplateFieldType.DOUBLE) |
---|
104 | } catch (NumberFormatException nfe) { |
---|
105 | doubleBoolean = false |
---|
106 | } |
---|
107 | finally { |
---|
108 | if (doubleBoolean) fieldtype = TemplateFieldType.DOUBLE |
---|
109 | } |
---|
110 | |
---|
111 | if (DateUtil.isCellDateFormatted(datamatrix_cell)) fieldtype = TemplateFieldType.DATE |
---|
112 | |
---|
113 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
114 | templatefieldtype: fieldtype, |
---|
115 | index: columnindex, |
---|
116 | entityclass: theEntity, |
---|
117 | property: property); |
---|
118 | break |
---|
119 | case Cell.CELL_TYPE_BLANK: |
---|
120 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
121 | templatefieldtype: TemplateFieldType.STRING, |
---|
122 | index: columnindex, |
---|
123 | entityclass: theEntity, |
---|
124 | property: property); |
---|
125 | break |
---|
126 | default: |
---|
127 | header[columnindex] = new dbnp.importer.MappingColumn(name: df.formatCellValue(headercell), |
---|
128 | templatefieldtype: TemplateFieldType.STRING, |
---|
129 | index: columnindex, |
---|
130 | entityclass: theEntity, |
---|
131 | property: property); |
---|
132 | break |
---|
133 | } // end of switch |
---|
134 | } // end of cell loop |
---|
135 | return header |
---|
136 | } |
---|
137 | |
---|
138 | /** |
---|
139 | * This method is meant to return a matrix of the rows and columns |
---|
140 | * used in the preview |
---|
141 | * |
---|
142 | * @param wb workbook object |
---|
143 | * @param sheetindex sheet index used |
---|
144 | * @param rows amount of rows returned |
---|
145 | * @return two dimensional array (matrix) of Cell objects |
---|
146 | */ |
---|
147 | Object[][] getDatamatrix(Workbook wb, header, int sheetindex, int datamatrix_start, int count) { |
---|
148 | def sheet = wb.getSheetAt(sheetindex) |
---|
149 | def rows = [] |
---|
150 | def df = new DataFormatter() |
---|
151 | |
---|
152 | count = (count < sheet.getLastRowNum()) ? count : sheet.getLastRowNum() |
---|
153 | |
---|
154 | // walk through all rows |
---|
155 | ((datamatrix_start + sheet.getFirstRowNum())..count).each { rowindex -> |
---|
156 | def row = [] |
---|
157 | |
---|
158 | (0..header.size() - 1).each { columnindex -> |
---|
159 | if (sheet.getRow(rowindex)) |
---|
160 | row.add( sheet.getRow(rowindex).getCell(columnindex, Row.CREATE_NULL_AS_BLANK) ) |
---|
161 | } |
---|
162 | |
---|
163 | rows.add(row) |
---|
164 | } |
---|
165 | |
---|
166 | return rows |
---|
167 | } |
---|
168 | |
---|
169 | /** |
---|
170 | * This method will move a file to a new location. |
---|
171 | * |
---|
172 | * @param file File object to move |
---|
173 | * @param folderpath folder to move the file to |
---|
174 | * @param filename (new) filename to give |
---|
175 | * @return if file has been moved succesful, the new path and filename will be returned, otherwise an empty string will be returned |
---|
176 | */ |
---|
177 | def moveFile(File file, String folderpath, String filename) { |
---|
178 | try { |
---|
179 | def rnd = ""; //System.currentTimeMillis() |
---|
180 | file.transferTo(new File(folderpath, rnd + filename)) |
---|
181 | return folderpath + filename |
---|
182 | } catch (Exception exception) { |
---|
183 | log.error "File move error, ${exception}" |
---|
184 | return "" |
---|
185 | } |
---|
186 | } |
---|
187 | |
---|
188 | /** |
---|
189 | * @return random numeric value |
---|
190 | */ |
---|
191 | def random = { |
---|
192 | return System.currentTimeMillis() + Runtime.runtime.freeMemory() |
---|
193 | } |
---|
194 | |
---|
195 | |
---|
196 | /** |
---|
197 | * Retrieves records with sample, subject, samplingevent etc. from a study |
---|
198 | * @param s Study to retrieve records from |
---|
199 | * @return A list with hashmaps [ 'objects': [ 'Sample': .., 'Subject': .., 'SamplingEvent': .., 'Event': '.. ], 'templates': [], 'templateCombination': .. ] |
---|
200 | */ |
---|
201 | protected def getRecords( Study s ) { |
---|
202 | def records = []; |
---|
203 | |
---|
204 | s.samples?.each { |
---|
205 | def record = [ 'objects': retrieveEntitiesBySample( it ) ]; |
---|
206 | |
---|
207 | def templates = [:] |
---|
208 | def templateCombination = []; |
---|
209 | record.objects.each { entity -> |
---|
210 | templates[ entity.key ] = entity.value?.template |
---|
211 | if( entity.value?.template ) |
---|
212 | templateCombination << entity.key + ": " + entity.value?.template?.name; |
---|
213 | } |
---|
214 | |
---|
215 | record.templates = templates; |
---|
216 | record.templateCombination = templateCombination.join( ', ' ) |
---|
217 | |
---|
218 | records << record |
---|
219 | } |
---|
220 | |
---|
221 | return records; |
---|
222 | } |
---|
223 | |
---|
224 | /** |
---|
225 | * Returns a subject, event and samplingEvent that belong to this sample |
---|
226 | * @param s Sample to find the information for |
---|
227 | * @return |
---|
228 | */ |
---|
229 | protected retrieveEntitiesBySample( Sample s ) { |
---|
230 | return [ |
---|
231 | 'Sample': s, |
---|
232 | 'Subject': s?.parentSubject, |
---|
233 | 'SamplingEvent': s?.parentEvent, |
---|
234 | 'Event': s?.parentEventGroup?.events?.toList()?.getAt(0) |
---|
235 | ] |
---|
236 | } |
---|
237 | |
---|
238 | /** |
---|
239 | * Imports data from a workbook into a list of ImportRecords. If some entities are already in the database, |
---|
240 | * these records are updated. |
---|
241 | * |
---|
242 | * This method is capable of importing Subject, Samples, SamplingEvents and Events |
---|
243 | * |
---|
244 | * @param templates Map of templates, identified by their entity as a key. For example: [ Subject: Template x, Sample: Template y ] |
---|
245 | * @param wb Excel workbook to import |
---|
246 | * @param sheetindex Number of the sheet to import data from |
---|
247 | * @param rowindex Row to start importing from. |
---|
248 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
249 | * @param parent Study to import all data into. Is used for determining which sample/event/subject/assay to update |
---|
250 | * @param createAllEntities If set to true, the system will also create objects for entities that have no data imported, but do have |
---|
251 | * a template assigned |
---|
252 | * @return List List with two entries: |
---|
253 | * 0 List with ImportRecords, one for each row in the excelsheet |
---|
254 | * 1 List with ImportCell objects, mentioning the cells that could not be correctly imported |
---|
255 | * (because the value in the excelsheet can't be entered into the template field) |
---|
256 | */ |
---|
257 | def importOrUpdateDataBySampleIdentifier( def templates, Workbook wb, int sheetindex, int rowindex, def mcmap, Study parent = null, boolean createAllEntities = true ) { |
---|
258 | if( !mcmap ) |
---|
259 | return; |
---|
260 | |
---|
261 | // Check whether the rows should be imported in one or more entities |
---|
262 | def entities |
---|
263 | if( createAllEntities ) { |
---|
264 | entities = templates.entrySet().value.findAll { it }.entity; |
---|
265 | } else { |
---|
266 | entities = mcmap.findAll{ !it.dontimport }.entityclass.unique(); |
---|
267 | } |
---|
268 | |
---|
269 | def sheet = wb.getSheetAt(sheetindex) |
---|
270 | def table = [] |
---|
271 | def failedcells = [] // list of cells that have failed to import |
---|
272 | // First check for each record whether an entity in the database should be updated, |
---|
273 | // or a new entity should be added. This is done before any new object is created, since |
---|
274 | // searching after new objects have been created (but not yet saved) will result in |
---|
275 | // org.hibernate.AssertionFailure: collection [...] was not processed by flush() |
---|
276 | // errors |
---|
277 | def existingEntities = [:] |
---|
278 | for( int i = rowindex; i <= sheet.getLastRowNum(); i++ ) { |
---|
279 | existingEntities[i] = findExistingEntities( entities, sheet.getRow(i), mcmap, parent ); |
---|
280 | } |
---|
281 | |
---|
282 | // walk through all rows and fill the table with records |
---|
283 | for( int i = rowindex; i <= sheet.getLastRowNum(); i++ ) { |
---|
284 | // Create an entity record based on a row read from Excel and store the cells which failed to be mapped |
---|
285 | def (record, failed) = importOrUpdateRecord( templates, entities, sheet.getRow(i), mcmap, parent, table, existingEntities[i] ); |
---|
286 | |
---|
287 | // Setup the relationships between the imported entities |
---|
288 | relateEntities( record ); |
---|
289 | |
---|
290 | // Add record with entities and its values to the table |
---|
291 | table.add(record) |
---|
292 | |
---|
293 | // If failed cells have been found, add them to the failed cells list |
---|
294 | if (failed?.importcells?.size() > 0) failedcells.add(failed) |
---|
295 | } |
---|
296 | |
---|
297 | return [ "table": table, "failedCells": failedcells ] |
---|
298 | } |
---|
299 | |
---|
300 | /** |
---|
301 | * Checks whether entities in the given row already exist in the database |
---|
302 | * they are updated. |
---|
303 | * |
---|
304 | * @param entities Entities that have to be imported for this row |
---|
305 | * @param excelRow Excel row to import into this record |
---|
306 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
307 | * @return Map Map with entities that have been found for this row. The key for the entities is the entity name (e.g.: [Sample: null, Subject: <subject object>] |
---|
308 | */ |
---|
309 | def findExistingEntities(def entities, Row excelRow, mcmap, parent ) { |
---|
310 | DataFormatter df = new DataFormatter(); |
---|
311 | |
---|
312 | // Find entities based on sample identifier |
---|
313 | def sample = findEntityByRow( dbnp.studycapturing.Sample, excelRow, mcmap, parent, [], df ); |
---|
314 | return retrieveEntitiesBySample( sample ); |
---|
315 | } |
---|
316 | |
---|
317 | /** |
---|
318 | * Imports a records from the excelsheet into the database. If the entities are already in the database |
---|
319 | * they are updated. |
---|
320 | * |
---|
321 | * This method is capable of importing Subject, Samples, SamplingEvents and Events |
---|
322 | * |
---|
323 | * @param templates Map of templates, identified by their entity as a key. For example: [ Sample: Template y ] |
---|
324 | * @param entities Entities that have to be imported for this row |
---|
325 | * @param excelRow Excel row to import into this record |
---|
326 | * @param mcmap Hashmap of mappingcolumns, with the first entry in the hashmap containing information about the first column, etc. |
---|
327 | * @param parent Study to import all data into. Is used for determining which sample/event/subject/assay to update |
---|
328 | * @param importedRows Rows that have been imported before this row. These rows might contain the same entities as are |
---|
329 | * imported in this row. These entities should be used again, to avoid importing duplicates. |
---|
330 | * @return List List with two entries: |
---|
331 | * 0 List with ImportRecords, one for each row in the excelsheet |
---|
332 | * 1 List with ImportCell objects, mentioning the cells that could not be correctly imported |
---|
333 | * (because the value in the excelsheet can't be entered into the template field) |
---|
334 | */ |
---|
335 | def importOrUpdateRecord(def templates, def entities, Row excelRow, mcmap, Study parent = null, List importedRows, Map existingEntities ) { |
---|
336 | DataFormatter df = new DataFormatter(); |
---|
337 | def record = [] // list of entities and the read values |
---|
338 | def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn |
---|
339 | |
---|
340 | // Check whether this record mentions a sample that has been imported before. In that case, |
---|
341 | // we update that record, in order to prevent importing the same sample multiple times |
---|
342 | def importedEntities = []; |
---|
343 | if( importedRows ) |
---|
344 | importedEntities = importedRows.flatten().findAll { it.class == dbnp.studycapturing.Sample }.unique(); |
---|
345 | |
---|
346 | def importedSample = null // findEntityInImportedEntities( dbnp.studycapturing.Sample, excelRow, mcmap, importedEntities, df ) |
---|
347 | def imported = [] // retrieveEntitiesBySample( importedSample ); |
---|
348 | for( entity in entities ) { |
---|
349 | // Check whether this entity should be added or updated |
---|
350 | // The entity is updated is an entity with the same 'identifier' (field |
---|
351 | // specified to be the identifying field) is found in the database |
---|
352 | def entityName = entity.name[ entity.name.lastIndexOf( '.' ) + 1..-1]; |
---|
353 | def template = templates[ entityName ]; |
---|
354 | |
---|
355 | // If no template is specified for this entity, continue with the next |
---|
356 | if( !template ) |
---|
357 | continue; |
---|
358 | |
---|
359 | // Check whether the object exists in the list of already imported entities |
---|
360 | def entityObject = imported[ entityName ] |
---|
361 | |
---|
362 | // If it doesn't, search for the entity in the database |
---|
363 | if( !entityObject && existingEntities ) |
---|
364 | entityObject = existingEntities[ entityName ]; |
---|
365 | |
---|
366 | // Otherwise, create a new object |
---|
367 | if( !entityObject ) |
---|
368 | entityObject = entity.newInstance(); |
---|
369 | |
---|
370 | // Update the template |
---|
371 | entityObject.template = template; |
---|
372 | |
---|
373 | // Go through the Excel row cell by cell |
---|
374 | for (Cell cell: excelRow) { |
---|
375 | // get the MappingColumn information of the current cell |
---|
376 | def mc = mcmap[cell.getColumnIndex()] |
---|
377 | def value |
---|
378 | |
---|
379 | // Check if column must be imported |
---|
380 | if (mc != null && !mc.dontimport && mc.entityclass == entity) { |
---|
381 | try { |
---|
382 | value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) |
---|
383 | } catch (NumberFormatException nfe) { |
---|
384 | value = "" |
---|
385 | } |
---|
386 | |
---|
387 | try { |
---|
388 | entityObject.setFieldValue(mc.property, value) |
---|
389 | } catch (Exception iae) { |
---|
390 | log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" |
---|
391 | |
---|
392 | // store the mapping column and value which failed |
---|
393 | def identifier = entityName.toLowerCase() + "_" + entityObject.getIdentifier() + "_" + mc.property |
---|
394 | |
---|
395 | def mcInstance = new MappingColumn() |
---|
396 | mcInstance.properties = mc.properties |
---|
397 | failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) |
---|
398 | } |
---|
399 | } // end if |
---|
400 | } // end for |
---|
401 | |
---|
402 | // If a Study is entered, use it as a 'parent' for other entities |
---|
403 | if( entity == Study ) |
---|
404 | parent = entityObject; |
---|
405 | |
---|
406 | record << entityObject; |
---|
407 | } |
---|
408 | |
---|
409 | // a failed column means that using the entity.setFieldValue() threw an exception |
---|
410 | return [record, failed] |
---|
411 | } |
---|
412 | |
---|
413 | /** |
---|
414 | * Looks into the database to find an object of the given entity that should be updated, given the excel row. |
---|
415 | * This is done by looking at the 'preferredIdentifier' field of the object. If it exists in the row, and the |
---|
416 | * value is already in the database for that field, an existing object is returned. Otherwise, null is returned |
---|
417 | * |
---|
418 | * @param entity Entity to search |
---|
419 | * @param excelRow Excelrow to search for |
---|
420 | * @param mcmap Map with MappingColumns |
---|
421 | * @param parent Parent study for the entity (if applicable). The returned entity will also have this parent |
---|
422 | * @param importedRows List of entities that have been imported before. The function will first look through this list to find |
---|
423 | * a matching entity. |
---|
424 | * @return An entity that has the same identifier as entered in the excelRow. The entity is first sought in the importedRows. If it |
---|
425 | * is not found there, the database is queried. If no entity is found at all, null is returned. |
---|
426 | */ |
---|
427 | def findEntityByRow( Class entity, Row excelRow, def mcmap, Study parent = null, List importedEntities = [], DataFormatter df = null ) { |
---|
428 | if( df == null ) |
---|
429 | df = new DataFormatter(); |
---|
430 | |
---|
431 | def identifierField = givePreferredIdentifier( entity ); |
---|
432 | |
---|
433 | if( identifierField ) { |
---|
434 | // Check whether the identifierField is chosen in the column matching |
---|
435 | def identifierColumn = mcmap.find { it.entityclass == entity && it.property == identifierField.name }; |
---|
436 | |
---|
437 | // If it is, find the identifier and look it up in the database |
---|
438 | if( identifierColumn ) { |
---|
439 | def identifierCell = excelRow.getCell( identifierColumn.index ); |
---|
440 | def identifier; |
---|
441 | try { |
---|
442 | identifier = formatValue(df.formatCellValue(identifierCell), identifierColumn.templatefieldtype) |
---|
443 | } catch (NumberFormatException nfe) { |
---|
444 | identifier = null |
---|
445 | } |
---|
446 | |
---|
447 | // Search for an existing object with the same identifier. |
---|
448 | if( identifier ) { |
---|
449 | // First search the already imported rows |
---|
450 | if( importedEntities ) { |
---|
451 | def imported = importedEntities.find { it.getFieldValue( identifierField.name ) == identifier }; |
---|
452 | if( imported ) |
---|
453 | return imported; |
---|
454 | } |
---|
455 | |
---|
456 | def c = entity.createCriteria(); |
---|
457 | |
---|
458 | // If the entity has a field 'parent', the search should be limited to |
---|
459 | // objects with the same parent. The method entity.hasProperty( "parent" ) doesn't |
---|
460 | // work, since the java.lang.Class entity doesn't know of the parent property. |
---|
461 | if( entity.belongsTo?.containsKey( "parent" ) ) { |
---|
462 | // If the entity requires a parent, but none is given, no |
---|
463 | // results are given from the database. This prevents the user |
---|
464 | // of changing data in another study |
---|
465 | if( parent && parent.id ) { |
---|
466 | println "Searching (with parent ) for " + entity.name + " with " + identifierField.name + " = " + identifier |
---|
467 | return c.get { |
---|
468 | eq( identifierField.name, identifier ) |
---|
469 | eq( "parent", parent ) |
---|
470 | } |
---|
471 | } |
---|
472 | } else { |
---|
473 | println "Searching (without parent ) for " + entity.name + " with " + identifierField.name + " = " + identifier |
---|
474 | return c.get { |
---|
475 | eq( identifierField.name, identifier ) |
---|
476 | } |
---|
477 | } |
---|
478 | } |
---|
479 | } |
---|
480 | } |
---|
481 | |
---|
482 | // No object is found |
---|
483 | return null; |
---|
484 | } |
---|
485 | |
---|
486 | /** |
---|
487 | * Looks into the list of already imported entities to find an object of the given entity that should be |
---|
488 | * updated, given the excel row. This is done by looking at the 'preferredIdentifier' field of the object. |
---|
489 | * If it exists in the row, and the list of imported entities contains an object with the same |
---|
490 | * identifier, the existing object is returned. Otherwise, null is returned |
---|
491 | * |
---|
492 | * @param entity Entity to search |
---|
493 | * @param excelRow Excelrow to search for |
---|
494 | * @param mcmap Map with MappingColumns |
---|
495 | * @param importedRows List of entities that have been imported before. The function will first look through this list to find |
---|
496 | * a matching entity. |
---|
497 | * @return An entity that has the same identifier as entered in the excelRow. The entity is first sought in the importedRows. If it |
---|
498 | * is not found there, the database is queried. If no entity is found at all, null is returned. |
---|
499 | */ |
---|
500 | def findEntityInImportedEntities( Class entity, Row excelRow, def mcmap, List importedEntities = [], DataFormatter df = null ) { |
---|
501 | if( df == null ) |
---|
502 | df = new DataFormatter(); |
---|
503 | |
---|
504 | def allFields = entity.giveDomainFields(); |
---|
505 | def identifierField = allFields.find { it.preferredIdentifier } |
---|
506 | |
---|
507 | if( identifierField ) { |
---|
508 | // Check whether the identifierField is chosen in the column matching |
---|
509 | def identifierColumn = mcmap.find { it.entityclass == entity && it.property == identifierField.name }; |
---|
510 | |
---|
511 | // If it is, find the identifier and look it up in the database |
---|
512 | if( identifierColumn ) { |
---|
513 | def identifierCell = excelRow.getCell( identifierColumn.index ); |
---|
514 | def identifier; |
---|
515 | try { |
---|
516 | identifier = formatValue(df.formatCellValue(identifierCell), identifierColumn.templatefieldtype) |
---|
517 | } catch (NumberFormatException nfe) { |
---|
518 | identifier = null |
---|
519 | } |
---|
520 | |
---|
521 | // Search for an existing object with the same identifier. |
---|
522 | if( identifier ) { |
---|
523 | // First search the already imported rows |
---|
524 | if( importedEntities ) { |
---|
525 | def imported = importedEntities.find { |
---|
526 | def fieldValue = it.getFieldValue( identifierField.name ) |
---|
527 | |
---|
528 | if( fieldValue instanceof String ) |
---|
529 | return fieldValue.toLowerCase() == identifier.toLowerCase(); |
---|
530 | else |
---|
531 | return fieldValue == identifier |
---|
532 | }; |
---|
533 | if( imported ) |
---|
534 | return imported; |
---|
535 | } |
---|
536 | } |
---|
537 | } |
---|
538 | } |
---|
539 | |
---|
540 | // No object is found |
---|
541 | return null; |
---|
542 | } |
---|
543 | |
---|
544 | |
---|
545 | /** |
---|
546 | * Creates relation between multiple entities that have been imported. The entities are |
---|
547 | * all created from one row in the excel sheet. |
---|
548 | */ |
---|
549 | def relateEntities( List entities) { |
---|
550 | def study = entities.find { it instanceof Study } |
---|
551 | def subject = entities.find { it instanceof Subject } |
---|
552 | def sample = entities.find { it instanceof Sample } |
---|
553 | def event = entities.find { it instanceof Event } |
---|
554 | def samplingEvent = entities.find { it instanceof SamplingEvent } |
---|
555 | def assay = entities.find { it instanceof Assay } |
---|
556 | |
---|
557 | // A study object is found in the entity list |
---|
558 | if( study ) { |
---|
559 | if( subject ) { |
---|
560 | subject.parent = study; |
---|
561 | study.addToSubjects( subject ); |
---|
562 | } |
---|
563 | if( sample ) { |
---|
564 | sample.parent = study |
---|
565 | study.addToSamples( sample ); |
---|
566 | } |
---|
567 | if( event ) { |
---|
568 | event.parent = study |
---|
569 | study.addToEvents( event ); |
---|
570 | } |
---|
571 | if( samplingEvent ) { |
---|
572 | samplingEvent.parent = study |
---|
573 | study.addToSamplingEvents( samplingEvent ); |
---|
574 | } |
---|
575 | if( assay ) { |
---|
576 | assay.parent = study; |
---|
577 | study.addToAssays( assay ); |
---|
578 | } |
---|
579 | } |
---|
580 | |
---|
581 | if( sample ) { |
---|
582 | if( subject ) sample.parentSubject = subject |
---|
583 | if( samplingEvent ) sample.parentEvent = samplingEvent; |
---|
584 | if( event ) { |
---|
585 | def evGroup = new EventGroup(); |
---|
586 | evGroup.addToEvents( event ); |
---|
587 | if( subject ) evGroup.addToSubjects( subject ); |
---|
588 | if( samplingEvent ) evGroup.addToSamplingEvents( samplingEvent ); |
---|
589 | |
---|
590 | sample.parentEventGroup = evGroup; |
---|
591 | } |
---|
592 | |
---|
593 | if( assay ) assay.addToSamples( sample ); |
---|
594 | } |
---|
595 | } |
---|
596 | |
---|
597 | /** |
---|
598 | * Method to read data from a workbook and to import data into a two dimensional |
---|
599 | * array |
---|
600 | * |
---|
601 | * @param template_id template identifier to use fields from |
---|
602 | * @param wb POI horrible spreadsheet formatted workbook object |
---|
603 | * @param mcmap linked hashmap (preserved order) of MappingColumns |
---|
604 | * @param sheetindex sheet to use when using multiple sheets |
---|
605 | * @param rowindex first row to start with reading the actual data (NOT the header) |
---|
606 | * @return two dimensional array containing records (with entities) |
---|
607 | * |
---|
608 | * @see dbnp.importer.MappingColumn |
---|
609 | */ |
---|
610 | def importData(template_id, Workbook wb, int sheetindex, int rowindex, mcmap) { |
---|
611 | def sheet = wb.getSheetAt(sheetindex) |
---|
612 | def template = Template.get(template_id) |
---|
613 | def table = [] |
---|
614 | def failedcells = [] // list of records |
---|
615 | // walk through all rows and fill the table with records |
---|
616 | (rowindex..sheet.getLastRowNum()).each { i -> |
---|
617 | // Create an entity record based on a row read from Excel and store the cells which failed to be mapped |
---|
618 | def (record, failed) = createRecord(template, sheet.getRow(i), mcmap) |
---|
619 | |
---|
620 | // Add record with entity and its values to the table |
---|
621 | table.add(record) |
---|
622 | |
---|
623 | // If failed cells have been found, add them to the failed cells list |
---|
624 | if (failed?.importcells?.size() > 0) failedcells.add(failed) |
---|
625 | } |
---|
626 | |
---|
627 | return [table, failedcells] |
---|
628 | } |
---|
629 | |
---|
630 | /** |
---|
631 | * Removes a cell from the failedCells list, based on the entity and field. If the entity and field didn't fail before |
---|
632 | * the method doesn't do anything. |
---|
633 | * |
---|
634 | * @param failedcell list of cells that have failed previously |
---|
635 | * @param entity entity to remove from the failedcells list |
---|
636 | * @param field field to remove the failed cell for. If no field is given, all cells for this entity will be removed |
---|
637 | * @return List Updated list of cells that have failed |
---|
638 | */ |
---|
639 | def removeFailedCell(failedcells, entity, field = null ) { |
---|
640 | if( !entity ) |
---|
641 | return failedcells; |
---|
642 | |
---|
643 | def filterClosure |
---|
644 | if( field ) { |
---|
645 | def entityIdField = "entity_" + entity.getIdentifier() + "_" + field.name.toLowerCase() |
---|
646 | filterClosure = { cell -> cell.entityidentifier != entityIdField } |
---|
647 | } else { |
---|
648 | def entityIdField = "entity_" + entity.getIdentifier() + "_" |
---|
649 | filterClosure = { cell -> !cell.entityidentifier.startsWith( entityIdField ) } |
---|
650 | } |
---|
651 | |
---|
652 | failedcells.each { record -> |
---|
653 | record.importcells = record.importcells.findAll( filterClosure ) |
---|
654 | } |
---|
655 | |
---|
656 | return failedcells; |
---|
657 | } |
---|
658 | |
---|
659 | /** |
---|
660 | * Returns the name of an input field as it is used for a specific entity in HTML. |
---|
661 | * |
---|
662 | * @param entity entity to retrieve the field name for |
---|
663 | * @param field field to retrieve the field name for |
---|
664 | * @return String Name of the HTML field for the given entity and field. Can also be used in the map |
---|
665 | * of request parameters |
---|
666 | */ |
---|
667 | def getFieldNameInTableEditor(entity, field) { |
---|
668 | def entityName = entity?.class.name[ entity?.class.name.lastIndexOf(".") + 1..-1] |
---|
669 | |
---|
670 | if( field instanceof TemplateField ) |
---|
671 | field = field.escapedName(); |
---|
672 | |
---|
673 | return entityName.toLowerCase() + "_" + entity.getIdentifier() + "_" + field.toLowerCase() |
---|
674 | } |
---|
675 | |
---|
676 | /** |
---|
677 | * Retrieves a mapping column from a list based on the given fieldname |
---|
678 | * @param mappingColumns List of mapping columns |
---|
679 | * @param fieldName Field name to find |
---|
680 | * @return Mapping column if a column is found, null otherwise |
---|
681 | */ |
---|
682 | def findMappingColumn( mappingColumns, String fieldName ) { |
---|
683 | return mappingColumns.find { it.property == fieldName.toLowerCase() } |
---|
684 | } |
---|
685 | |
---|
686 | /** Method to put failed cells back into the datamatrix. Failed cells are cell values |
---|
687 | * which could not be stored in an entity (e.g. Humu Supiuns in an ontology field). |
---|
688 | * Empty corrections should not be stored |
---|
689 | * |
---|
690 | * @param datamatrix two dimensional array containing entities and possibly also failed cells |
---|
691 | * @param failedcells list with maps of failed cells in [mappingcolumn, cell] format |
---|
692 | * @param correctedcells map of corrected cells in [cellhashcode, value] format |
---|
693 | * */ |
---|
694 | def saveCorrectedCells(datamatrix, failedcells, correctedcells) { |
---|
695 | |
---|
696 | // Loop through all failed cells (stored as |
---|
697 | failedcells.each { record -> |
---|
698 | record.value.importcells.each { cell -> |
---|
699 | |
---|
700 | // Get the corrected value |
---|
701 | def correctedvalue = correctedcells.find { it.key.toInteger() == cell.getIdentifier()}.value |
---|
702 | |
---|
703 | // Find the record in the table which the mappingcolumn belongs to |
---|
704 | def tablerecord = datamatrix.find { it.hashCode() == record.key } |
---|
705 | |
---|
706 | // Loop through all entities in the record and correct them if necessary |
---|
707 | tablerecord.each { rec -> |
---|
708 | rec.each { entity -> |
---|
709 | try { |
---|
710 | // Update the entity field |
---|
711 | entity.setFieldValue(cell.mappingcolumn.property, correctedvalue) |
---|
712 | //log.info "Adjusted " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
713 | } |
---|
714 | catch (Exception e) { |
---|
715 | //log.info "Could not map corrected ontology: " + cell.mappingcolumn.property + " to " + correctedvalue |
---|
716 | } |
---|
717 | } |
---|
718 | } // end of table record |
---|
719 | } // end of cell record |
---|
720 | } // end of failedlist |
---|
721 | } |
---|
722 | |
---|
723 | /** |
---|
724 | * Method to store a matrix containing the entities in a record like structure. Every row in the table |
---|
725 | * contains one or more entity objects (which contain fields with values). So actually a row represents |
---|
726 | * a record with fields from one or more different entities. |
---|
727 | * |
---|
728 | * @param study entity Study |
---|
729 | * @param datamatrix two dimensional array containing entities with values read from Excel file |
---|
730 | */ |
---|
731 | static saveDatamatrix(Study study, importerEntityType, datamatrix, authenticationService, log) { |
---|
732 | def validatedSuccesfully = 0 |
---|
733 | def entitystored = null |
---|
734 | |
---|
735 | // Study passed? Sync data |
---|
736 | if (study != null && importerEntityType != 'Study') study.refresh() |
---|
737 | |
---|
738 | // go through the data matrix, read every record and validate the entity and try to persist it |
---|
739 | datamatrix.each { record -> |
---|
740 | record.each { entity -> |
---|
741 | switch (entity.getClass()) { |
---|
742 | case Study: log.info ".importer wizard, persisting Study `" + entity + "`: " |
---|
743 | entity.owner = authenticationService.getLoggedInUser() |
---|
744 | |
---|
745 | if (entity.validate()) { |
---|
746 | if (!entity.save(flush:true)) { |
---|
747 | log.error ".importer wizard, study could not be saved: " + entity |
---|
748 | throw new Exception('.importer wizard, study could not be saved: ' + entity) |
---|
749 | } |
---|
750 | } else { |
---|
751 | log.error ".importer wizard, study could not be validated: " + entity |
---|
752 | throw new Exception('.importer wizard, study could not be validated: ' + entity) |
---|
753 | } |
---|
754 | |
---|
755 | break |
---|
756 | case Subject: log.info ".importer wizard, persisting Subject `" + entity + "`: " |
---|
757 | |
---|
758 | // is the current entity not already in the database? |
---|
759 | //entitystored = isEntityStored(entity) |
---|
760 | |
---|
761 | // this entity is new, so add it to the study |
---|
762 | //if (entitystored==null) |
---|
763 | |
---|
764 | study.addToSubjects(entity) |
---|
765 | |
---|
766 | break |
---|
767 | case Event: log.info ".importer wizard, persisting Event `" + entity + "`: " |
---|
768 | study.addToEvents(entity) |
---|
769 | break |
---|
770 | case Sample: log.info ".importer wizard, persisting Sample `" + entity + "`: " |
---|
771 | |
---|
772 | // is this sample validatable (sample name unique for example?) |
---|
773 | study.addToSamples(entity) |
---|
774 | |
---|
775 | break |
---|
776 | case SamplingEvent: log.info ".importer wizard, persisting SamplingEvent `" + entity + "`: " |
---|
777 | study.addToSamplingEvents(entity) |
---|
778 | break |
---|
779 | default: log.info ".importer wizard, skipping persisting of `" + entity.getclass() + "`" |
---|
780 | break |
---|
781 | } // end switch |
---|
782 | } // end record |
---|
783 | } // end datamatrix |
---|
784 | |
---|
785 | // validate study |
---|
786 | if (importerEntityType != 'Study') { |
---|
787 | if (study.validate()) { |
---|
788 | if (!study.save(flush: true)) { |
---|
789 | //this.appendErrors(flow.study, flash.wizardErrors) |
---|
790 | throw new Exception('.importer wizard [saveDatamatrix] error while saving study') |
---|
791 | } |
---|
792 | } else { |
---|
793 | throw new Exception('.importer wizard [saveDatamatrix] study does not validate') |
---|
794 | } |
---|
795 | } |
---|
796 | |
---|
797 | //persistEntity(study) |
---|
798 | |
---|
799 | //return [validatedSuccesfully, updatedentities, failedtopersist] |
---|
800 | //return [0,0,0] |
---|
801 | return true |
---|
802 | } |
---|
803 | |
---|
804 | /** |
---|
805 | * Check whether an entity already exist. A unique field in the entity is |
---|
806 | * used to check whether the instantiated entity (read from Excel) is new. |
---|
807 | * If the entity is found in the database it will be returned as is. |
---|
808 | * |
---|
809 | * @param entity entity object like a Study, Subject, Sample et cetera |
---|
810 | * @return entity if found, otherwise null |
---|
811 | */ |
---|
812 | def isEntityStored(entity) { |
---|
813 | switch (entity.getClass()) { |
---|
814 | case Study: return Study.findByCode(entity.code) |
---|
815 | break |
---|
816 | case Subject: return Subject.findByParentAndName(entity.parent, entity.name) |
---|
817 | break |
---|
818 | case Event: break |
---|
819 | case Sample: |
---|
820 | break |
---|
821 | case SamplingEvent: break |
---|
822 | default: // unknown entity |
---|
823 | return null |
---|
824 | } |
---|
825 | } |
---|
826 | |
---|
827 | /** |
---|
828 | * Find the entity and update the fields. The entity is an instance |
---|
829 | * read from Excel. This method looks in the database for the entity |
---|
830 | * having the same identifier. If it has found the same entity |
---|
831 | * already in the database, it will update the record. |
---|
832 | * |
---|
833 | * @param entitystored existing record in the database to update |
---|
834 | * @param entity entity read from Excel |
---|
835 | */ |
---|
836 | def updateEntity(entitystored, entity) { |
---|
837 | switch (entity.getClass()) { |
---|
838 | case Study: break |
---|
839 | case Subject: entitystored.properties = entity.properties |
---|
840 | entitystored.save() |
---|
841 | break |
---|
842 | case Event: break |
---|
843 | case Sample: break |
---|
844 | case SamplingEvent: break |
---|
845 | default: // unknown entity |
---|
846 | return null |
---|
847 | } |
---|
848 | } |
---|
849 | |
---|
850 | /** |
---|
851 | * Method to persist entities into the database |
---|
852 | * Checks whether entity already exists (based on identifier column 'name') |
---|
853 | * |
---|
854 | * @param entity entity object like Study, Subject, Protocol et cetera |
---|
855 | * |
---|
856 | */ |
---|
857 | boolean persistEntity(entity) { |
---|
858 | /*log.info ".import wizard persisting ${entity}" |
---|
859 | try { |
---|
860 | entity.save(flush: true) |
---|
861 | return true |
---|
862 | } catch (Exception e) { |
---|
863 | def session = sessionFactory.currentSession |
---|
864 | session.setFlushMode(org.hibernate.FlushMode.MANUAL) |
---|
865 | log.error ".import wizard, failed to save entity:\n" + org.apache.commons.lang.exception.ExceptionUtils.getRootCauseMessage(e) |
---|
866 | } |
---|
867 | return true*/ |
---|
868 | //println "persistEntity" |
---|
869 | } |
---|
870 | |
---|
871 | /** |
---|
872 | * This method creates a record (array) containing entities with values |
---|
873 | * |
---|
874 | * @param template_id template identifier |
---|
875 | * @param excelrow POI based Excel row containing the cells |
---|
876 | * @param mcmap map containing MappingColumn objects |
---|
877 | * @return list of entities and list of failed cells |
---|
878 | */ |
---|
879 | def createRecord(template, Row excelrow, mcmap) { |
---|
880 | def df = new DataFormatter() |
---|
881 | def tft = TemplateFieldType |
---|
882 | def record = [] // list of entities and the read values |
---|
883 | def failed = new ImportRecord() // map with entity identifier and failed mappingcolumn |
---|
884 | |
---|
885 | // Initialize all possible entities with the chosen template |
---|
886 | def study = new Study(template: template) |
---|
887 | def subject = new Subject(template: template) |
---|
888 | def samplingEvent = new SamplingEvent(template: template) |
---|
889 | def event = new Event(template: template) |
---|
890 | def sample = new Sample(template: template) |
---|
891 | |
---|
892 | // Go through the Excel row cell by cell |
---|
893 | for (Cell cell: excelrow) { |
---|
894 | // get the MappingColumn information of the current cell |
---|
895 | def mc = mcmap[cell.getColumnIndex()] |
---|
896 | def value |
---|
897 | |
---|
898 | // Check if column must be imported |
---|
899 | if (mc != null) if (!mc.dontimport) { |
---|
900 | try { |
---|
901 | value = formatValue(df.formatCellValue(cell), mc.templatefieldtype) |
---|
902 | } catch (NumberFormatException nfe) { |
---|
903 | value = "" |
---|
904 | } |
---|
905 | |
---|
906 | try { |
---|
907 | // which entity does the current cell (field) belong to? |
---|
908 | switch (mc.entityclass) { |
---|
909 | case Study: // does the entity already exist in the record? If not make it so. |
---|
910 | (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(study) |
---|
911 | study.setFieldValue(mc.property, value) |
---|
912 | break |
---|
913 | case Subject: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(subject) |
---|
914 | subject.setFieldValue(mc.property, value) |
---|
915 | break |
---|
916 | case SamplingEvent: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(samplingEvent) |
---|
917 | samplingEvent.setFieldValue(mc.property, value) |
---|
918 | break |
---|
919 | case Event: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(event) |
---|
920 | event.setFieldValue(mc.property, value) |
---|
921 | break |
---|
922 | case Sample: (record.any {it.getClass() == mc.entityclass}) ? 0 : record.add(sample) |
---|
923 | sample.setFieldValue(mc.property, value) |
---|
924 | break |
---|
925 | case Object: // don't import |
---|
926 | break |
---|
927 | } // end switch |
---|
928 | } catch (Exception iae) { |
---|
929 | log.error ".import wizard error could not set property `" + mc.property + "` to value `" + value + "`" |
---|
930 | // store the mapping column and value which failed |
---|
931 | def identifier |
---|
932 | def fieldName = mc.property?.toLowerCase() |
---|
933 | |
---|
934 | switch (mc.entityclass) { |
---|
935 | case Study: identifier = "entity_" + study.getIdentifier() + "_" + fieldName |
---|
936 | break |
---|
937 | case Subject: identifier = "entity_" + subject.getIdentifier() + "_" + fieldName |
---|
938 | break |
---|
939 | case SamplingEvent: identifier = "entity_" + samplingEvent.getIdentifier() + "_" + fieldName |
---|
940 | break |
---|
941 | case Event: identifier = "entity_" + event.getIdentifier() + "_" + fieldName |
---|
942 | break |
---|
943 | case Sample: identifier = "entity_" + sample.getIdentifier() + "_" + fieldName |
---|
944 | break |
---|
945 | case Object: // don't import |
---|
946 | break |
---|
947 | } |
---|
948 | |
---|
949 | def mcInstance = new MappingColumn() |
---|
950 | mcInstance.properties = mc.properties |
---|
951 | failed.addToImportcells(new ImportCell(mappingcolumn: mcInstance, value: value, entityidentifier: identifier)) |
---|
952 | } |
---|
953 | } // end |
---|
954 | } // end for |
---|
955 | // a failed column means that using the entity.setFieldValue() threw an exception |
---|
956 | return [record, failed] |
---|
957 | } |
---|
958 | |
---|
959 | /** |
---|
960 | * Method to parse a value conform a specific type |
---|
961 | * @param value string containing the value |
---|
962 | * @return object corresponding to the TemplateFieldType |
---|
963 | */ |
---|
964 | def formatValue(String value, TemplateFieldType type) throws NumberFormatException { |
---|
965 | switch (type) { |
---|
966 | case TemplateFieldType.STRING: return value.trim() |
---|
967 | case TemplateFieldType.TEXT: return value.trim() |
---|
968 | case TemplateFieldType.LONG: return (long) Double.valueOf(value) |
---|
969 | //case TemplateFieldType.FLOAT : return Float.valueOf(value.replace(",",".")); |
---|
970 | case TemplateFieldType.DOUBLE: return Double.valueOf(value.replace(",", ".")); |
---|
971 | case TemplateFieldType.STRINGLIST: return value.trim() |
---|
972 | case TemplateFieldType.ONTOLOGYTERM: return value.trim() |
---|
973 | case TemplateFieldType.DATE: return value |
---|
974 | default: return value |
---|
975 | } |
---|
976 | } |
---|
977 | |
---|
978 | /** |
---|
979 | * Returns the preferred identifier field for a given entity or |
---|
980 | * null if no preferred identifier is given |
---|
981 | * @param entity TemplateEntity class |
---|
982 | * @return The preferred identifier field or NULL if no preferred identifier is given |
---|
983 | */ |
---|
984 | public TemplateField givePreferredIdentifier( Class entity ) { |
---|
985 | def allFields = entity.giveDomainFields(); |
---|
986 | return allFields.find { it.preferredIdentifier } |
---|
987 | } |
---|
988 | |
---|
989 | // classes for fuzzy string matching |
---|
990 | // <FUZZY MATCHING> |
---|
991 | |
---|
992 | static def similarity(l_seq, r_seq, degree = 2) { |
---|
993 | def l_histo = countNgramFrequency(l_seq, degree) |
---|
994 | def r_histo = countNgramFrequency(r_seq, degree) |
---|
995 | |
---|
996 | dotProduct(l_histo, r_histo) / |
---|
997 | Math.sqrt(dotProduct(l_histo, l_histo) * |
---|
998 | dotProduct(r_histo, r_histo)) |
---|
999 | } |
---|
1000 | |
---|
1001 | static def countNgramFrequency(sequence, degree) { |
---|
1002 | def histo = [:] |
---|
1003 | def items = sequence.size() |
---|
1004 | |
---|
1005 | for (int i = 0; i + degree <= items; i++) { |
---|
1006 | def gram = sequence[i..<(i + degree)] |
---|
1007 | histo[gram] = 1 + histo.get(gram, 0) |
---|
1008 | } |
---|
1009 | histo |
---|
1010 | } |
---|
1011 | |
---|
1012 | static def dotProduct(l_histo, r_histo) { |
---|
1013 | def sum = 0 |
---|
1014 | l_histo.each { key, value -> |
---|
1015 | sum = sum + l_histo[key] * r_histo.get(key, 0) |
---|
1016 | } |
---|
1017 | sum |
---|
1018 | } |
---|
1019 | |
---|
1020 | static def stringSimilarity(l_str, r_str, degree = 2) { |
---|
1021 | |
---|
1022 | similarity(l_str.toString().toLowerCase().toCharArray(), |
---|
1023 | r_str.toString().toLowerCase().toCharArray(), |
---|
1024 | degree) |
---|
1025 | } |
---|
1026 | |
---|
1027 | static def mostSimilar(pattern, candidates, threshold = 0) { |
---|
1028 | def topScore = 0 |
---|
1029 | def bestFit = null |
---|
1030 | |
---|
1031 | candidates.each { candidate -> |
---|
1032 | def score = stringSimilarity(pattern, candidate) |
---|
1033 | if (score > topScore) { |
---|
1034 | topScore = score |
---|
1035 | bestFit = candidate |
---|
1036 | } |
---|
1037 | } |
---|
1038 | |
---|
1039 | if (topScore < threshold) |
---|
1040 | bestFit = null |
---|
1041 | |
---|
1042 | bestFit |
---|
1043 | } |
---|
1044 | // </FUZZY MATCHING> |
---|
1045 | |
---|
1046 | } |
---|