Changeset 843

Show
Ignore:
Timestamp:
13-07-12 14:50:59 (22 months ago)
Author:
hailiang.mei@…
Message:

more fixes.

Location:
trunk/code/conceptwiki/imports
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • trunk/code/conceptwiki/imports/imports-common/src/main/java/nl/nbic/conceptwiki/imports/common/AbstractConceptImporter.java

    r842 r843  
    309309//            } 
    310310             
    311              
    312311            final Concept goalTag = resolveReferences(tag); 
    313312            Concept toBeUpdatedConcept = getConcept(goalTag); 
     
    447446                        getConcept(resolveReferences(triple.getSubject())), 
    448447                        getConcept(resolveReferences(triple.getPredicate())), 
    449                         getConcept(resolveReferences(triple.getObject())), importBranch); 
     448                        getConcept(resolveReferences(triple.getObject())), CONCEPTWIKI_BRANCH); 
    450449            } 
    451450 
  • trunk/code/conceptwiki/imports/imports-swissprot/src/main/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtImporter.java

    r842 r843  
    179179    protected Concept getConcept(final Concept concept) { 
    180180         
    181         // get it if it is a specific gene from SwissProt 
    182         if (isGeneOrGenome(concept) && hasNotationSource(concept, swissProtSource)) { 
    183             return getSingleConceptInSwissProtUsingNotationTag(concept, geneSemanticType); 
    184         } 
    185          
    186         // get it if it is a specific protein from SwissProt 
    187         if (isProteinOrAminoAcid(concept) && hasNotationSource(concept, swissProtSource)) { 
    188             return getSingleConceptInSwissProtUsingNotationTag(concept, proteinSemanticType); 
    189         } 
     181        // get it if it is a specific gene or protein from SwissProt 
     182        if (hasNotationSource(concept, swissProtSource)) { 
     183            Concept tag = null; 
     184            if (isGeneOrGenome(concept)) { 
     185                tag = geneSemanticType; 
     186            } 
     187             
     188            if (isProteinOrAminoAcid(concept)) { 
     189                tag = proteinSemanticType; 
     190            } 
     191             
     192            if (tag != null) { 
     193                return getSingleConceptInSwissProtUsingNotationTag(concept, tag); 
     194            } 
     195        } 
     196//         
     197//        if (isGeneOrGenome(concept) && hasNotationSource(concept, swissProtSource)) { 
     198//            return getSingleConceptInSwissProtUsingNotationTag(concept, geneSemanticType); 
     199//        } 
     200//         
     201//        // get it if it is a specific protein from SwissProt 
     202//        if (isProteinOrAminoAcid(concept) && hasNotationSource(concept, swissProtSource)) { 
     203//            return getSingleConceptInSwissProtUsingNotationTag(concept, proteinSemanticType); 
     204//        } 
    190205         
    191206        // get it if it is an UMLS concept using UMLS notation 
     
    193208        if (umlsConcept != null) { 
    194209            return umlsConcept; 
    195         }        
     210        } 
     211         
     212        // since we can't resolve the SwissProt source concept in the SwissProt branch using  
     213        // getSingleConceptUsingPreferredLabel. As it will cause disambiguation problem with other 
     214        // SwissProt concepts, we do this in the following hardcoded way. 
     215        if (SWISSPROT_SOURCE.equals(conceptUtility.getPreferredLabel(concept, LANG_EN).getText())) { 
     216            return swissProtSource; 
     217        } 
    196218         
    197219        // get it if there is a match on preferred label 
    198         return getSingleConceptUsingPreferredLabel(conceptUtility.getPreferredLabel(concept, LANG_EN));  
     220        return getSingleConceptUsingLabel(conceptUtility.getPreferredLabel(concept, LANG_EN));  
    199221    } 
    200222     
     
    354376     
    355377    /** 
    356      * Within ConceptWiki, search for the concept with a specific preferred label in an ordered list of branches. 
     378     * Within ConceptWiki, search for the concept with a specific label in an ordered list of branches. 
    357379     * #1 ConceptWiki branch: for generic gene/proteins  
    358380     * #2 UMLS branch: for source and semantic types 
     
    361383     * @return the concept if found, otherwise null 
    362384     */ 
    363     private Concept getSingleConceptUsingPreferredLabel(@NotNull final Label label) { 
     385    private Concept getSingleConceptUsingLabel(@NotNull final Label label) { 
    364386                 
    365387        // get it if there is a match on preferred label in ConceptWiki branch, e.g. generic protein, genes. 
    366         Concept result = getSingleConceptUsingPreferredLabelTagBranch(label, null, CONCEPTWIKI_BRANCH); 
     388        Concept result = getSingleConceptUsingLabelTagBranch(label, null, CONCEPTWIKI_BRANCH); 
    367389         
    368390        if (result != null) { 
     
    371393         
    372394        // get it if there is a match on preferred label in UMLS branch, e.g. Gene Or Genome 
    373         result = getSingleConceptUsingPreferredLabelTagBranch(label, null, UMLS_BRANCH); 
     395        result = getSingleConceptUsingLabelTagBranch(label, null, UMLS_BRANCH); 
    374396         
    375397        if (result != null) { 
     
    377399        } 
    378400        
    379         // get it if there is a match on preferred label in SwissProt branch, e.g. the SwissProt source 
    380         return getSingleConceptUsingPreferredLabelTagBranch(label, null, SWISSPROT_BRANCH);  
    381     } 
    382      
    383     /** 
    384      * Within ConceptWiki, search for the concept with a specific preferred label and a specific tag and in a specific branch. 
     401        // get it if there is a match on preferred label in SwissProt branch, this is really the last chance and it  
     402        // may lead to ambiguous results. 
     403        return getSingleConceptUsingLabelTagBranch(label, null, SWISSPROT_BRANCH); 
     404    } 
     405     
     406    /** 
     407     * Within ConceptWiki, search for the concept with a specific label and a specific tag and in a specific branch. 
    385408     * There should be exactly one or null result, so more than one search results will result exception. 
    386409     * @param label a defined label 
     
    389412     * @return the concept if found, otherwise null 
    390413     */ 
    391     private Concept getSingleConceptUsingPreferredLabelTagBranch(@NotNull final Label label, final Concept tag, final int branchId) { 
     414    private Concept getSingleConceptUsingLabelTagBranch(@NotNull final Label label, final Concept tag, final int branchId) { 
    392415         
    393416        Concept resultConcept = null; 
  • trunk/code/conceptwiki/imports/imports-swissprot/src/main/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtReader.java

    r842 r843  
    398398         
    399399        // Some bootstrap concepts should be made as tags. 
    400         final Set<BootstrapConceptEnvelop> bootstrapTags = Sets.newHashSet();         
    401         bootstrapTags.add(new BootstrapConceptEnvelopImpl(swissProtSource, SWISSPROT_BRANCH)); 
    402         bootstrapTags.add(new BootstrapConceptEnvelopImpl(proteinSemanticType, UMLS_BRANCH)); 
    403         bootstrapTags.add(new BootstrapConceptEnvelopImpl(geneSemanticType, UMLS_BRANCH)); 
    404         bootstrapTags.add(new BootstrapConceptEnvelopImpl(species, CONCEPTWIKI_BRANCH)); 
    405         bootstrapTags.add(new BootstrapConceptEnvelopImpl(nonSpeciesSpecificType, CONCEPTWIKI_BRANCH)); 
     400        final Set<BootstrapConceptEnvelop> bootstrapTags = loadBootstrapTagsFromBoostrapConcepts(); 
    406401         
    407402        final Map<String, BootstrapConceptEnvelop> bootstrapGenericProteinTags = Maps.newHashMap(); 
     
    455450         
    456451        return bootstrapTags.iterator(); 
     452    } 
     453     
     454    /** 
     455     * load bootstrap tags with bootstrap concepts that are tags as well. 
     456     * @return a set of bootstrap tags  
     457     */ 
     458    private Set<BootstrapConceptEnvelop> loadBootstrapTagsFromBoostrapConcepts() { 
     459         
     460        final Set<BootstrapConceptEnvelop> bootstrapTags = Sets.newHashSet();         
     461//        bootstrapTags.add(new BootstrapConceptEnvelopImpl(swissProtSource, SWISSPROT_BRANCH)); 
     462        bootstrapTags.add(new BootstrapConceptEnvelopImpl(proteinSemanticType, UMLS_BRANCH)); 
     463        bootstrapTags.add(new BootstrapConceptEnvelopImpl(geneSemanticType, UMLS_BRANCH)); 
     464        bootstrapTags.add(new BootstrapConceptEnvelopImpl(species, CONCEPTWIKI_BRANCH)); 
     465        bootstrapTags.add(new BootstrapConceptEnvelopImpl(nonSpeciesSpecificType, CONCEPTWIKI_BRANCH)); 
     466         
     467        return bootstrapTags; 
    457468    } 
    458469         
     
    10041015            } 
    10051016        } 
     1017         
     1018        final Concept speciesConcept = allSpeciesConcepts.get(organism); 
     1019         
     1020        if (speciesConcept == null) { 
     1021            throw new RuntimeException("species unknown: " + organism + ". Please check the list of species."); 
     1022        } 
    10061023 
    10071024        // add protein species triple 
    1008         triples.add(new TripleImpl(null, allProteinConcepts.get(primaryAccession), hasSpeciesPredicate, allSpeciesConcepts.get(organism))); 
     1025        triples.add(new TripleImpl(null, allProteinConcepts.get(primaryAccession), hasSpeciesPredicate, speciesConcept)); 
    10091026         
    10101027        // add gene triple if any 
    10111028        final Set<Label> geneLabels = getGeneLabelsFromEntry(organism, entry); 
    10121029         
    1013         if (geneLabels != null) { 
    1014              
     1030        if (geneLabels != null) {             
    10151031            // add gene species triple 
    1016             triples.add(new TripleImpl(null, allGeneConcepts.get(primaryAccession), hasSpeciesPredicate, allSpeciesConcepts.get(organism))); 
     1032            triples.add(new TripleImpl(null, allGeneConcepts.get(primaryAccession), hasSpeciesPredicate, speciesConcept)); 
    10171033        } 
    10181034         
  • trunk/code/conceptwiki/imports/imports-swissprot/src/test/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtReaderUnitTest.java

    r842 r843  
    9696        } 
    9797         
    98         // There are 14 bootstrap tags (5 proteins, 5 genes, 5 general bootstrap concepts) defined in SwissProt. 
    99         assertEquals(15, count); 
     98        // There are 14 bootstrap tags (5 proteins, 5 genes, 4 general bootstrap concepts) defined in SwissProt. 
     99        assertEquals(14, count); 
    100100    } 
    101101