Changeset 843


Ignore:
Timestamp:
Jul 13, 2012, 2:50:59 PM (4 years ago)
Author:
hailiang.mei@…
Message:

more fixes.

Location:
trunk/code/conceptwiki/imports
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/code/conceptwiki/imports/imports-common/src/main/java/nl/nbic/conceptwiki/imports/common/AbstractConceptImporter.java

    r842 r843  
    309309//            }
    310310           
    311            
    312311            final Concept goalTag = resolveReferences(tag);
    313312            Concept toBeUpdatedConcept = getConcept(goalTag);
     
    447446                        getConcept(resolveReferences(triple.getSubject())),
    448447                        getConcept(resolveReferences(triple.getPredicate())),
    449                         getConcept(resolveReferences(triple.getObject())), importBranch);
     448                        getConcept(resolveReferences(triple.getObject())), CONCEPTWIKI_BRANCH);
    450449            }
    451450
  • trunk/code/conceptwiki/imports/imports-swissprot/src/main/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtImporter.java

    r842 r843  
    179179    protected Concept getConcept(final Concept concept) {
    180180       
    181         // get it if it is a specific gene from SwissProt
    182         if (isGeneOrGenome(concept) && hasNotationSource(concept, swissProtSource)) {
    183             return getSingleConceptInSwissProtUsingNotationTag(concept, geneSemanticType);
    184         }
    185        
    186         // get it if it is a specific protein from SwissProt
    187         if (isProteinOrAminoAcid(concept) && hasNotationSource(concept, swissProtSource)) {
    188             return getSingleConceptInSwissProtUsingNotationTag(concept, proteinSemanticType);
    189         }
     181        // get it if it is a specific gene or protein from SwissProt
     182        if (hasNotationSource(concept, swissProtSource)) {
     183            Concept tag = null;
     184            if (isGeneOrGenome(concept)) {
     185                tag = geneSemanticType;
     186            }
     187           
     188            if (isProteinOrAminoAcid(concept)) {
     189                tag = proteinSemanticType;
     190            }
     191           
     192            if (tag != null) {
     193                return getSingleConceptInSwissProtUsingNotationTag(concept, tag);
     194            }
     195        }
     196//       
     197//        if (isGeneOrGenome(concept) && hasNotationSource(concept, swissProtSource)) {
     198//            return getSingleConceptInSwissProtUsingNotationTag(concept, geneSemanticType);
     199//        }
     200//       
     201//        // get it if it is a specific protein from SwissProt
     202//        if (isProteinOrAminoAcid(concept) && hasNotationSource(concept, swissProtSource)) {
     203//            return getSingleConceptInSwissProtUsingNotationTag(concept, proteinSemanticType);
     204//        }
    190205       
    191206        // get it if it is an UMLS concept using UMLS notation
     
    193208        if (umlsConcept != null) {
    194209            return umlsConcept;
    195         }       
     210        }
     211       
     212        // since we can't resolve the SwissProt source concept in the SwissProt branch using
     213        // getSingleConceptUsingPreferredLabel. As it will cause disambiguation problem with other
     214        // SwissProt concepts, we do this in the following hardcoded way.
     215        if (SWISSPROT_SOURCE.equals(conceptUtility.getPreferredLabel(concept, LANG_EN).getText())) {
     216            return swissProtSource;
     217        }
    196218       
    197219        // get it if there is a match on preferred label
    198         return getSingleConceptUsingPreferredLabel(conceptUtility.getPreferredLabel(concept, LANG_EN));
     220        return getSingleConceptUsingLabel(conceptUtility.getPreferredLabel(concept, LANG_EN));
    199221    }
    200222   
     
    354376   
    355377    /**
    356      * Within ConceptWiki, search for the concept with a specific preferred label in an ordered list of branches.
     378     * Within ConceptWiki, search for the concept with a specific label in an ordered list of branches.
    357379     * #1 ConceptWiki branch: for generic gene/proteins
    358380     * #2 UMLS branch: for source and semantic types
     
    361383     * @return the concept if found, otherwise null
    362384     */
    363     private Concept getSingleConceptUsingPreferredLabel(@NotNull final Label label) {
     385    private Concept getSingleConceptUsingLabel(@NotNull final Label label) {
    364386               
    365387        // get it if there is a match on preferred label in ConceptWiki branch, e.g. generic protein, genes.
    366         Concept result = getSingleConceptUsingPreferredLabelTagBranch(label, null, CONCEPTWIKI_BRANCH);
     388        Concept result = getSingleConceptUsingLabelTagBranch(label, null, CONCEPTWIKI_BRANCH);
    367389       
    368390        if (result != null) {
     
    371393       
    372394        // get it if there is a match on preferred label in UMLS branch, e.g. Gene Or Genome
    373         result = getSingleConceptUsingPreferredLabelTagBranch(label, null, UMLS_BRANCH);
     395        result = getSingleConceptUsingLabelTagBranch(label, null, UMLS_BRANCH);
    374396       
    375397        if (result != null) {
     
    377399        }
    378400       
    379         // get it if there is a match on preferred label in SwissProt branch, e.g. the SwissProt source
    380         return getSingleConceptUsingPreferredLabelTagBranch(label, null, SWISSPROT_BRANCH);
    381     }
    382    
    383     /**
    384      * Within ConceptWiki, search for the concept with a specific preferred label and a specific tag and in a specific branch.
     401        // get it if there is a match on preferred label in SwissProt branch, this is really the last chance and it
     402        // may lead to ambiguous results.
     403        return getSingleConceptUsingLabelTagBranch(label, null, SWISSPROT_BRANCH);
     404    }
     405   
     406    /**
     407     * Within ConceptWiki, search for the concept with a specific label and a specific tag and in a specific branch.
    385408     * There should be exactly one or null result, so more than one search results will result exception.
    386409     * @param label a defined label
     
    389412     * @return the concept if found, otherwise null
    390413     */
    391     private Concept getSingleConceptUsingPreferredLabelTagBranch(@NotNull final Label label, final Concept tag, final int branchId) {
     414    private Concept getSingleConceptUsingLabelTagBranch(@NotNull final Label label, final Concept tag, final int branchId) {
    392415       
    393416        Concept resultConcept = null;
  • trunk/code/conceptwiki/imports/imports-swissprot/src/main/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtReader.java

    r842 r843  
    398398       
    399399        // Some bootstrap concepts should be made as tags.
    400         final Set<BootstrapConceptEnvelop> bootstrapTags = Sets.newHashSet();       
    401         bootstrapTags.add(new BootstrapConceptEnvelopImpl(swissProtSource, SWISSPROT_BRANCH));
    402         bootstrapTags.add(new BootstrapConceptEnvelopImpl(proteinSemanticType, UMLS_BRANCH));
    403         bootstrapTags.add(new BootstrapConceptEnvelopImpl(geneSemanticType, UMLS_BRANCH));
    404         bootstrapTags.add(new BootstrapConceptEnvelopImpl(species, CONCEPTWIKI_BRANCH));
    405         bootstrapTags.add(new BootstrapConceptEnvelopImpl(nonSpeciesSpecificType, CONCEPTWIKI_BRANCH));
     400        final Set<BootstrapConceptEnvelop> bootstrapTags = loadBootstrapTagsFromBoostrapConcepts();
    406401       
    407402        final Map<String, BootstrapConceptEnvelop> bootstrapGenericProteinTags = Maps.newHashMap();
     
    455450       
    456451        return bootstrapTags.iterator();
     452    }
     453   
     454    /**
     455     * load bootstrap tags with bootstrap concepts that are tags as well.
     456     * @return a set of bootstrap tags
     457     */
     458    private Set<BootstrapConceptEnvelop> loadBootstrapTagsFromBoostrapConcepts() {
     459       
     460        final Set<BootstrapConceptEnvelop> bootstrapTags = Sets.newHashSet();       
     461//        bootstrapTags.add(new BootstrapConceptEnvelopImpl(swissProtSource, SWISSPROT_BRANCH));
     462        bootstrapTags.add(new BootstrapConceptEnvelopImpl(proteinSemanticType, UMLS_BRANCH));
     463        bootstrapTags.add(new BootstrapConceptEnvelopImpl(geneSemanticType, UMLS_BRANCH));
     464        bootstrapTags.add(new BootstrapConceptEnvelopImpl(species, CONCEPTWIKI_BRANCH));
     465        bootstrapTags.add(new BootstrapConceptEnvelopImpl(nonSpeciesSpecificType, CONCEPTWIKI_BRANCH));
     466       
     467        return bootstrapTags;
    457468    }
    458469       
     
    10041015            }
    10051016        }
     1017       
     1018        final Concept speciesConcept = allSpeciesConcepts.get(organism);
     1019       
     1020        if (speciesConcept == null) {
     1021            throw new RuntimeException("species unknown: " + organism + ". Please check the list of species.");
     1022        }
    10061023
    10071024        // add protein species triple
    1008         triples.add(new TripleImpl(null, allProteinConcepts.get(primaryAccession), hasSpeciesPredicate, allSpeciesConcepts.get(organism)));
     1025        triples.add(new TripleImpl(null, allProteinConcepts.get(primaryAccession), hasSpeciesPredicate, speciesConcept));
    10091026       
    10101027        // add gene triple if any
    10111028        final Set<Label> geneLabels = getGeneLabelsFromEntry(organism, entry);
    10121029       
    1013         if (geneLabels != null) {
    1014            
     1030        if (geneLabels != null) {           
    10151031            // add gene species triple
    1016             triples.add(new TripleImpl(null, allGeneConcepts.get(primaryAccession), hasSpeciesPredicate, allSpeciesConcepts.get(organism)));
     1032            triples.add(new TripleImpl(null, allGeneConcepts.get(primaryAccession), hasSpeciesPredicate, speciesConcept));
    10171033        }
    10181034       
  • trunk/code/conceptwiki/imports/imports-swissprot/src/test/java/nl/nbic/conceptwiki/imports/swissprot/SwissProtReaderUnitTest.java

    r842 r843  
    9696        }
    9797       
    98         // There are 14 bootstrap tags (5 proteins, 5 genes, 5 general bootstrap concepts) defined in SwissProt.
    99         assertEquals(15, count);
     98        // There are 14 bootstrap tags (5 proteins, 5 genes, 4 general bootstrap concepts) defined in SwissProt.
     99        assertEquals(14, count);
    100100    }
    101101   
Note: See TracChangeset for help on using the changeset viewer.