Changeset 43


Ignore:
Timestamp:
Aug 11, 2010, 4:09:41 PM (10 years ago)
Author:
j@…
bzr:base-revision:
j@dannynavarro.net-20100809132407-1fe2wb5blanid09e
bzr:committer:
Danny Navarro <j@dannynavarro.net>
bzr:file-ids:

mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1
bzr:mapping-version:
v4
bzr:repository-uuid:
724254b2-fbe6-419d-9466-c04ef4c9d29d
bzr:revision-id:
j@dannynavarro.net-20100809140009-s4lr753fe9u4cqvu
bzr:revno:
43
bzr:revprop:branch-nick:
trunk
bzr:root:
trunk
bzr:timestamp:
2010-08-09 16:00:09.927000046 +0200
bzr:user-agent:
bzr2.1.2+bzr-svn1.0.3
svn:original-date:
2010-08-09T14:00:09.927000Z
Message:

Parse extra section of spectra

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/mzcms/parsers.py

    r42 r43  
    4646    """
    4747    def __init__(self,
    48                  scan_str=r'FinneganScanNumber%3a%20(\d+)',
     48                 scan_str=r'FinneganScanNumber%3a%20(\d+)%20',
    4949                 rawfn_str=r'RawFile%3a%20(.+raw)',
    5050                 decoy_str=r'^IPI:REV_:IPI',
    5151                 contaminant_str=r'^IPI:CON_:IPI'
    5252                 ):
    53         scan_regex = re.compile(scan_str)
     53        self.scan_regex = re.compile(scan_str)
    5454        decoy_regex = re.compile(decoy_str)
     55        contaminant_regex = re.compile(contaminant_str)
    5556        # Contaminants are not exactly decoy psms
    56         self.non_target_regexes = (scan_regex, decoy_regex)
     57        self.non_target_regexes = (contaminant_regex, decoy_regex)
    5758        self.rawfn_regex = re.compile(rawfn_str)
    58         self.contaminant_regex = re.compile(contaminant_str)
    5959
    6060    def _parse_spectra_fn(self, dat_file):
     
    9595                charge = int(r_charge.rstrip('+'))
    9696                summspec.append((mz, charge))
    97         return summspec
     97            elif line.strip() == SEPARATOR:
     98                return summspec
    9899
    99100    def _parse_psms(self, dat_file):
     
    133134                return nativeid_psms
    134135
    135     def _parse_spectra(self, dat_file):
     136    def _parse_extraspec(self, dat_file):
    136137        """Gets the data coming from the input file from Mascot dat
    137138           format
    138139        """
    139         nativeid_spectra = defaultdict(dict)
    140         nativeid = 1
    141         for line in dat_file:
    142             nativeid_str = ''.join(("query", str(nativeid)))
    143             query_line = SECTION_TEMPLATE.substitute(section=nativeid_str)
    144             if line.strip() == query_line:
    145                 current_id_mgf = nativeid_spectra[nativeid]
    146                 nativeid += 1
    147             elif line.startswith("title="):
     140        extraspec = list()
     141        for line in dat_file:
     142            if line.startswith("title="):
    148143                line = line.strip()
    149144                try:
    150145                    quoted_rawfn = re.search(self.rawfn_regex, line).group(1)
     146                    rawfn = urllib.unquote(quoted_rawfn)
    151147                except AttributeError:
    152                     sys.exit("It seems there is no raw: field in TITLE")
    153                 rawfn = urllib.unquote(quoted_rawfn)
    154                 current_id_mgf["RawFile"] = rawfn
    155                 if self.scans_in_title:
    156                     try:
    157                         scan = int(re.search(self.scan_regex, line).group(1))
    158                     except AttributeError:
    159                         sys.exit("Default scan regex for TITLE field"
    160                                  " doesn't match anything")
    161                     current_id_mgf['Scan'] = scan
    162             elif line.startswith("charge="):
    163                 charge = line.strip().split('=')[1]
    164                 current_id_mgf['Charge'] = charge
    165             elif not self.scans_in_title and line.startswith("scans="):
    166                 scan = int(line.strip().split('=')[2])
    167                 current_id_mgf['Scan'] = scan
    168         return nativeid_spectra
     148                    sys.exit("It seems the raw regex is not valid")
     149                try:
     150                    scan = int(re.search(self.scan_regex, line).group(1))
     151                except AttributeError:
     152                    sys.exit("Default scan regex for TITLE field"
     153                             " doesn't match anything")
     154                extraspec.append((rawfn, scan))
     155        return extraspec
    169156
    170157    def parse(self, dat_file):
     
    175162        summspec = self._parse_summary(dat_file)
    176163        nativeid_psms = self._parse_psms(dat_file)
    177         extraspec = self._parse_spectra(dat_file)
    178         for native_id, psms in nativeid_psms.items():
    179             for psm in psms:
    180                 psm['#SpectraFile'] = pkl_fn
    181                 psm['FragMode'] = frag_mode
    182                 psm.update(nativeid_spectra[native_id])
    183                 yield psm
     164        extraspec = self._parse_extraspec(dat_file)
     165        #for native_id, psms in nativeid_psms.items():
     166        #    for psm in psms:
     167        #        psm['#SpectraFile'] = pkl_fn
     168        #        psm['FragMode'] = frag_mode
     169        #        psm.update(nativeid_spectra[native_id])
     170        #        yield psm
     171        pass
    184172
    185173def get_prot_ids(accs):
Note: See TracChangeset for help on using the changeset viewer.