Changeset 61


Ignore:
Timestamp:
Aug 11, 2010, 4:14:54 PM (10 years ago)
Author:
j@…
bzr:base-revision:
j@dannynavarro.net-20100811130247-m5vcvd0g77y9h9ko
bzr:committer:
Danny Navarro <j@dannynavarro.net>
bzr:file-ids:

mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1
bzr:mapping-version:
v4
bzr:repository-uuid:
724254b2-fbe6-419d-9466-c04ef4c9d29d
bzr:revision-id:
j@dannynavarro.net-20100811131617-5n3z2dsngary27hz
bzr:revno:
61
bzr:revprop:branch-nick:
trunk
bzr:root:
trunk
bzr:timestamp:
2010-08-11 15:16:17.500000000 +0200
bzr:user-agent:
bzr2.1.2+bzr-svn1.0.3
svn:original-date:
2010-08-11T13:16:17.500000Z
Message:

Added fasta parse function

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/mzcms/parsers.py

    r59 r61  
    264264        writer.writerow(psm)
    265265
     266def parse_fasta(fasta_path, proteins):
     267    # XXX: Improve parser, don't seek back
     268    """Update protein sequence container from fasta file"""
     269    fasta_file = open(fasta_path, 'rb')
     270    while True:
     271        line = fasta_file.readline().decode('utf-8').strip()
     272        if line.startswith('>IPI:IPI'):
     273            seq_lines = list()
     274            prot_id = line.split('|')[0][5:]
     275            while True:
     276                try:
     277                    line = fasta_file.readline().decode('utf-8').strip()
     278                except StopIteration:
     279                    return
     280                if not line.startswith('>'):
     281                    seq_lines.append(line)
     282                    last_line = fasta_file.tell()
     283                else:
     284                    sequence = ''.join(seq_lines)
     285                    if prot_id in proteins:
     286                        #proteins[prot_id].sequence == 'TBI':
     287                        proteins[prot_id].sequence = sequence
     288                        fasta_file.seek(last_line)
     289                    break
     290
    266291# XXX: Use better defaults for containers and factories
    267292def parse_dats(dats_dir, proteins_container=dict, peptides_container=dict,
     
    281306    import transaction
    282307    for root, dirs, files in os.walk(dats_dir):
    283         for dat_fn in files:
    284             if fnmatch.fnmatch(dat_fn, '*.dat'):
    285                 with open(os.path.join(root, dat_fn)) as dat_file:
     308        for fn in files:
     309            if fnmatch.fnmatch(fn, '*.dat'):
     310                with open(os.path.join(root, fn)) as dat_file:
    286311                    ser_proteins, ser_peptides, \
    287312                    ser_spectra, ser_psms = dat_parser.parse(dat_file)
     
    300325                            psms[str(psm_id)] = psm
    301326                    transaction.savepoint()
     327            elif fnmatch.fnmatch(fn, '*.fasta'):
     328                fasta_path = os.path.join(root, fn)
     329    # XXX: Improve this function, has to return something
     330    try:
     331        parse_fasta(fasta_path, proteins)
     332    except UnboundLocalError:
     333        print("You must have the fasta database in the data to parse "
     334              "directory")
     335
    302336
    303337def main():
Note: See TracChangeset for help on using the changeset viewer.