Changeset 1126


Ignore:
Timestamp:
Nov 12, 2010, 12:08:07 PM (6 years ago)
Author:
t.w.abma@…
Message:
  • initial fuzzy logic incorporated in Importer service
Location:
trunk/grails-app
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/grails-app/controllers/dbnp/importer/ImporterController.groovy

    r1122 r1126  
    3939
    4040    def index = {
     41        //String.metaClass.mostSimilarTo = { ImporterService.mostSimilar(delegate, it) }
     42
    4143        //session.import_referer = request.forwardURI
    4244        // should do a check what is in the url, strip it?
  • trunk/grails-app/services/dbnp/importer/ImporterService.groovy

    r1122 r1126  
    507507    }
    508508
     509    // classes for fuzzy string matching
     510    // <FUZZY MATCHING>
     511    static def similarity(l_seq, r_seq, degree=2) {
     512        def l_histo = countNgramFrequency(l_seq, degree)
     513        def r_histo = countNgramFrequency(r_seq, degree)
     514
     515        dotProduct(l_histo, r_histo) /
     516        Math.sqrt(dotProduct(l_histo, l_histo) *
     517            dotProduct(r_histo, r_histo))
     518    }
     519
     520    static def countNgramFrequency(sequence, degree) {
     521        def histo = [:]
     522        def items = sequence.size()
     523
     524        for (int i = 0; i + degree <= items; i++)
     525        {
     526            def gram = sequence[i..<(i + degree)]
     527            histo[gram] = 1 + histo.get(gram, 0)
     528        }
     529        histo
     530    }
     531
     532    static def dotProduct(l_histo, r_histo) {
     533        def sum = 0
     534        l_histo.each { key, value ->
     535            sum = sum + l_histo[key] * r_histo.get(key, 0)
     536        }
     537        sum
     538    }
     539
     540    static def stringSimilarity (l_str, r_str, degree=2) {
     541        similarity(l_str.toLowerCase().toCharArray(),
     542            r_str.toLowerCase().toCharArray(),
     543            degree)
     544    }
     545
     546    static def mostSimilar(pattern, candidates, threshold=0) {
     547        def topScore = 0
     548        def bestFit = null
     549
     550        candidates.each { candidate ->
     551            def score = stringSimilarity(pattern, candidate)
     552            if (score > topScore) {
     553                topScore = score
     554                bestFit = candidate
     555            }
     556        }
     557
     558        if (topScore < threshold)
     559        bestFit = null
     560
     561        bestFit
     562    }
     563    // </FUZZY MATCHING>
     564
    509565}
Note: See TracChangeset for help on using the changeset viewer.