Changeset 21 for trunk/grails-app/services/nl
- Timestamp:
- Mar 22, 2011, 3:04:40 PM (12 years ago)
- Location:
- trunk/grails-app/services/nl/tno/metagenomics
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/grails-app/services/nl/tno/metagenomics/SampleExcelService.groovy
r19 r21 85 85 if( !includeRun ) 86 86 fields = fields - runName 87 87 88 89 // Do matching using fuzzy search. The 0.8 treshold makes sure that no match if chosen if 90 // there is actually no match at all. 91 def matches = fuzzySearchService.mostSimilarUnique( headers, possibleFields, 0.8 ); 92 88 93 headers.eachWithIndex { header, idx -> 89 // Do matching using fuzzy search. The 0.8 treshold makes sure that no match if chosen if 90 // there is actually no match at all. 91 if( !header || header.toString().trim() == "" ) 92 bestMatches[idx] = null 93 else 94 bestMatches[idx] = fuzzySearchService.mostSimilar( header, possibleFields, 0.8 ); 94 bestMatches[idx] = matches[idx].candidate; 95 95 } 96 96 -
trunk/grails-app/services/nl/tno/metagenomics/imports/FuzzySearchService.groovy
r2 r21 4 4 5 5 static transactional = true 6 7 /** 8 * Matches the patterns with the candidates, and returns the best candidates for all patterns, but returning 9 * the candidates only once 10 * 11 * @param patterns List with patterns to search for 12 * @param candidates List with candidates to search in 13 * @param treshold Treshold the matches have to be above 14 * @return 15 */ 16 static def mostSimilarUnique( patterns, candidates, treshold ) { 17 def matches = [] 18 19 // Find the best matching candidate for each pattern 20 patterns.each { pattern -> 21 def topScore = 0 22 def bestFit = null 23 24 candidates.each { candidate -> 25 def score = stringSimilarity(pattern, candidate); 26 if( score >= treshold ) 27 matches << [ 'pattern': pattern, 'candidate': candidate, 'score': score ]; 28 } 29 } 30 31 // Sort the list on descending score 32 matches = matches.sort( { a, b -> b.score <=> a.score } as Comparator ) 33 34 // Loop through the scores and select the best matching for every candidate 35 def results = patterns.collect { [ 'pattern': it, 'candidate': null ] } 36 def selectedCandidates = []; 37 def filledPatterns = []; 38 39 matches.each { match -> 40 if( !filledPatterns.contains( match.pattern ) && !selectedCandidates.contains( match.candidate ) ) { 41 results.find { result -> result.pattern == match.pattern }?.candidate = match.candidate; 42 43 selectedCandidates << match.candidate; 44 filledPatterns << match.pattern; 45 } 46 } 47 48 return results 49 } 6 50 7 51 // classes for fuzzy string matching
Note: See TracChangeset
for help on using the changeset viewer.