Changeset 27


Ignore:
Timestamp:
Aug 11, 2010, 4:05:09 PM (10 years ago)
Author:
j@…
bzr:base-revision:
j@dannynavarro.net-20100806120031-x0349ul02lytyb8h
bzr:committer:
Danny Navarro <j@dannynavarro.net>
bzr:file-ids:

mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1
bzr:mapping-version:
v4
bzr:repository-uuid:
724254b2-fbe6-419d-9466-c04ef4c9d29d
bzr:revision-id:
j@dannynavarro.net-20100806125108-r2yigtsr1ksh7h7e
bzr:revno:
27
bzr:revprop:branch-nick:
trunk
bzr:root:
trunk
bzr:timestamp:
2010-08-06 14:51:08.160000086 +0200
bzr:user-agent:
bzr2.1.2+bzr-svn1.0.3
svn:original-date:
2010-08-06T12:51:08.160000Z
Message:

specified default mgf regex to parse

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/mzcms/parsers.py

    r26 r27  
    66import re
    77import csv
     8import urllib
    89from optparse import OptionParser
    910from string import Template
     
    3031                        )
    3132#fraction_regex = re.compile(r'fraction%3a%20(\d+)')
    32 RAWFN_REGEX = re.compile(r'raw%3a%20(.+raw)')
    3333
    3434class DatParser(object):
    3535    """Mascot Dat Parser
    3636    """
    37     def __init__(self, decoy_str, scans_in_title=False):
     37    def __init__(self,
     38                 scan_str=r'FinneganScanNumber%3a%20(\d+)',
     39                 rawfn_str=r'RawFile%3a%20(.+raw)',
     40                 decoy_str=r'^IPI:REV_:IPI',
     41                 contaminant_str=r'^IPI:CON_:IPI'
     42                 ):
     43        self.scan_regex = re.compile(scan_str)
    3844        self.decoy_regex = re.compile(decoy_str)
    39         self.scans_in_title = scans_in_title
    40         if scans_in_title:
    41             self.scan_regex = re.compile(r'scan%3a%20(\d+)')
     45        self.rawfn_regex = re.compile(rawfn_str)
     46        self.contaminant_regex = re.compile(contaminant_str)
    4247
    4348    def _parse_spectra_fn(self, datfile):
     
    115120                #current_id_mgf["Fraction"] = fraction
    116121                try:
    117                     rawfn = re.search(RAWFN_REGEX, line).group(1)
     122                    quoted_rawfn = re.search(rawfn_regex, line).group(1)
    118123                except AttributeError:
    119124                    sys.exit("It seems there is no raw: field in TITLE")
    120                 rawfn = rawfn.replace('%2e', '.')
     125                rawfn = urllib.unquote(quoted_rawfn)
    121126                current_id_mgf["RawFile"] = rawfn
    122127                if self.scans_in_title:
     
    196201    spectra = spectra_factory()
    197202    psms = psms_factory()
    198     dat_parser = DatParser(decoy_str, scans_in_title)
     203    dat_parser = DatParser()
    199204    for dat_path in dat_paths:
    200205        with open(dat_path) as dat_file:
     
    226231                      "--decoy-str",
    227232                      dest="decoy_str",
    228                       default="^IPI:REV_:IPI",
     233                      default=r'^IPI:REV_:IPI',
    229234                      help="1 to parse the scan number from the TITLE field, "
    230235                           "0 to parse the scan number from the SCAN field "
Note: See TracChangeset for help on using the changeset viewer.