Changeset 1126
- Timestamp:
- Nov 12, 2010, 12:08:07 PM (10 years ago)
- Location:
- trunk/grails-app
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/grails-app/controllers/dbnp/importer/ImporterController.groovy
r1122 r1126 39 39 40 40 def index = { 41 //String.metaClass.mostSimilarTo = { ImporterService.mostSimilar(delegate, it) } 42 41 43 //session.import_referer = request.forwardURI 42 44 // should do a check what is in the url, strip it? -
trunk/grails-app/services/dbnp/importer/ImporterService.groovy
r1122 r1126 507 507 } 508 508 509 // classes for fuzzy string matching 510 // <FUZZY MATCHING> 511 static def similarity(l_seq, r_seq, degree=2) { 512 def l_histo = countNgramFrequency(l_seq, degree) 513 def r_histo = countNgramFrequency(r_seq, degree) 514 515 dotProduct(l_histo, r_histo) / 516 Math.sqrt(dotProduct(l_histo, l_histo) * 517 dotProduct(r_histo, r_histo)) 518 } 519 520 static def countNgramFrequency(sequence, degree) { 521 def histo = [:] 522 def items = sequence.size() 523 524 for (int i = 0; i + degree <= items; i++) 525 { 526 def gram = sequence[i..<(i + degree)] 527 histo[gram] = 1 + histo.get(gram, 0) 528 } 529 histo 530 } 531 532 static def dotProduct(l_histo, r_histo) { 533 def sum = 0 534 l_histo.each { key, value -> 535 sum = sum + l_histo[key] * r_histo.get(key, 0) 536 } 537 sum 538 } 539 540 static def stringSimilarity (l_str, r_str, degree=2) { 541 similarity(l_str.toLowerCase().toCharArray(), 542 r_str.toLowerCase().toCharArray(), 543 degree) 544 } 545 546 static def mostSimilar(pattern, candidates, threshold=0) { 547 def topScore = 0 548 def bestFit = null 549 550 candidates.each { candidate -> 551 def score = stringSimilarity(pattern, candidate) 552 if (score > topScore) { 553 topScore = score 554 bestFit = candidate 555 } 556 } 557 558 if (topScore < threshold) 559 bestFit = null 560 561 bestFit 562 } 563 // </FUZZY MATCHING> 564 509 565 }
Note: See TracChangeset
for help on using the changeset viewer.