Changeset 40


Ignore:
Timestamp:
May 7, 2009, 7:22:31 PM (11 years ago)
Author:
pvkouteren
Message:

run_imports contains an example of importing through the Importer class.
utils contains the Importer class.
parserfactory added for on the fly checking of constraints and requirements.
genericparser added as an interface for inheriting classes and guaranteed coöperation with parserfactory.
gmt, msigdb and obo files implement this interface already.

Files:
4 added
3 edited

Legend:

Unmodified
Added
Removed
  • parsers/obo.py

    r21 r40  
    11import logging
    2 
     2import genericparser
    33import container
    44from parsers import utils
    55
    6 class OboParser:
    7     def __init__(self, filename):
     6class OboParser(genericparser.GenericParser):
     7    def __init__(self):
    88        """Init function. Sets up the record_types, truth and function mappers."""
    9 
    10         self.filename = filename
    119       
    1210        self.multi_fields = ['is_a', 'synonym', 'xref']
     
    2624        field_func['relationship'] = lambda x: x
    2725        field_func['xref'] = lambda x: x
     26        self.fileformats = []
     27        self.fileformats.append("obo")
    2828
    2929        self.field_func = field_func
    3030
     31    def getSupportedFileFormats(self):
     32        return self.fileformats
     33   
    3134    def _find_records(self):
    3235        """Splits file into records, does no further parsing."""
     
    103106            defaults=defaults)
    104107
    105     def parse(self):
     108    def parse(self, filename):
     109        self.filename = filename
    106110        """Function to be called to start parsing the file specified in init."""
    107111        raw_recs = self._find_records()
  • parsers/utils.py

    r17 r40  
    22import threading
    33import urllib
     4import parsers.parserfactory
     5import container
    46
    5 import container
     7"""
     8The importer class can import multiple files. It uses the ParserFactory to determine the parsing order and
     9possibly the parsers for files if they are not provided / invalid.
     10"""
     11class Importer:
     12    def __init__(self):
     13        self.parserfactory = parsers.parserfactory.ParserFactory()
     14   
     15   
     16    def importSources(self, sources):
     17        if isinstance(sources, str):
     18            # we get a string: this is just one source!
     19            s = []
     20            s.append(sources)
     21            self.parseList(s)
     22        else:
     23            self.parseList(sources)
     24           
     25    """
     26    Parse a list of files. This means that we not only have to check the prerequisites, but also an order in which to
     27    parse the files as a file can be a prerequisite for another file
     28    """
     29    def parseList(self, filelist):
     30        print "Finding parse order and appropriate parser(s) for the file(s).."
     31        file_parser = self.parserfactory.findParseOrder(filelist)
     32        if len(file_parser) > 0:
     33            print "Parse order and parsers determined. Starting parsing.."
     34            for file, parser in file_parser:
     35                print "About to parse file " + file
     36                parser.parse(file)
     37            print "Done parsing!"
     38        else:
     39            print "The specified list could not be parsed."
     40           
     41           
     42    """
     43    def parse(self, file, parser=None):
     44        # First check if all prerequisites are present
     45        errors = self.checkPrerequisites(file_prerequisites)
     46        if not empty(errors):
     47            data = "\n".join(errors)
     48            raise prerequisitesError, data
     49        else:
     50            if not parser:
     51                parser = self.findParser(file)
     52            else:
     53                pass
     54            if parser:
     55                self.doParsing(file, parser)
     56            else:
     57                raise parserError
     58    """
     59           
    660
    761class Downer(threading.Thread):
  • run_imports.py

    r4 r40  
    11import container
    2 import parsers.fasta
    32
    4 c = parsers.fasta.get_default_dbSNP_container()
    5 m = parsers.fasta.get_default_dbSNP_functions()
    6 f = parsers.fasta.fasta("/home/jan/data/dbSNP/rs_ch22.fas", c, m)
    7 f.parse()
    8 print c
     3# Fasta parser test
     4#import parsers.fasta
     5#c = parsers.fasta.get_default_dbSNP_container()
     6#m = parsers.fasta.get_default_dbSNP_functions()
     7#f = parsers.fasta.fasta("/home/jan/data/dbSNP/rs_ch22.fas", c, m)
     8#f.parse()
     9#print c
     10
     11# GMTParser test
     12#import parsers.gmt
     13#p1 = parsers.gmt.GMTParser("/home/patrick/workspace/msigdb_gmt.gmt")
     14#c = p1.parse()
     15#print c
     16
     17# MSigDBParser test
     18#import parsers.msigdb
     19#p2 = parsers.msigdb.MSigDBXMLParser()
     20#c = p2.parse("/home/patrick/workspace/msigdb_v2.5.xml")
     21#print c
     22"""
     23# Testing MSigDB: Print certain category
     24for dict in c:
     25    if dict['category_code'] == "c5":
     26        print dict
     27"""
     28
     29# ParserFactory test
     30#import parsers.parserfactory
     31#pf = parsers.parserfactory.ParserFactory()
     32#pf.getAvailableParsers()
     33#print pf.getSupportedFileFormats()
     34
     35# Importer test
     36import parsers.utils
     37
     38todo = []
     39#item = ("/home/patrick/workspace/msigdb_v2.5.xml", 'parsers.msigdb.MSigDBXMLParser')
     40item = ("/home/patrick/workspace/msigdb_v2.5.xml", '')
     41todo.append(item)
     42#item = ("/home/patrick/workspace/psi-mi.obo", 'parsers.obo.OboParser')
     43item = ("/home/patrick/workspace/psi-mi.obo", '')
     44todo.append(item)
     45i = parsers.utils.Importer()
     46i.importSources(todo)
     47
     48"""
     49Database connection testing
     50
     51from db import postgres
     52from container import ibidas
     53
     54conf = config.loadConfig('ibidas.ini', _ConfigDefault)
     55conn = postgres.openConnection(conf['database.host'],\
     56         conf['database.port'],conf['database.name'],\
     57         conf['database.user'],conf['database.password'])
     58self.db = ibidas.load_ibidas(conn)
     59"""
Note: See TracChangeset for help on using the changeset viewer.