Changeset 43
- Timestamp:
- Aug 11, 2010, 4:09:41 PM (11 years ago)
- bzr:base-revision:
- j@dannynavarro.net-20100809132407-1fe2wb5blanid09e
- bzr:committer:
- Danny Navarro <j@dannynavarro.net>
- bzr:file-ids:
mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1- bzr:mapping-version:
- v4
- bzr:repository-uuid:
- 724254b2-fbe6-419d-9466-c04ef4c9d29d
- bzr:revision-id:
- j@dannynavarro.net-20100809140009-s4lr753fe9u4cqvu
- bzr:revno:
- 43
- bzr:revprop:branch-nick:
- trunk
- bzr:root:
- trunk
- bzr:timestamp:
- 2010-08-09 16:00:09.927000046 +0200
- bzr:user-agent:
- bzr2.1.2+bzr-svn1.0.3
- svn:original-date:
- 2010-08-09T14:00:09.927000Z
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/mzcms/parsers.py
r42 r43 46 46 """ 47 47 def __init__(self, 48 scan_str=r'FinneganScanNumber%3a%20(\d+) ',48 scan_str=r'FinneganScanNumber%3a%20(\d+)%20', 49 49 rawfn_str=r'RawFile%3a%20(.+raw)', 50 50 decoy_str=r'^IPI:REV_:IPI', 51 51 contaminant_str=r'^IPI:CON_:IPI' 52 52 ): 53 s can_regex = re.compile(scan_str)53 self.scan_regex = re.compile(scan_str) 54 54 decoy_regex = re.compile(decoy_str) 55 contaminant_regex = re.compile(contaminant_str) 55 56 # Contaminants are not exactly decoy psms 56 self.non_target_regexes = ( scan_regex, decoy_regex)57 self.non_target_regexes = (contaminant_regex, decoy_regex) 57 58 self.rawfn_regex = re.compile(rawfn_str) 58 self.contaminant_regex = re.compile(contaminant_str)59 59 60 60 def _parse_spectra_fn(self, dat_file): … … 95 95 charge = int(r_charge.rstrip('+')) 96 96 summspec.append((mz, charge)) 97 return summspec 97 elif line.strip() == SEPARATOR: 98 return summspec 98 99 99 100 def _parse_psms(self, dat_file): … … 133 134 return nativeid_psms 134 135 135 def _parse_ spectra(self, dat_file):136 def _parse_extraspec(self, dat_file): 136 137 """Gets the data coming from the input file from Mascot dat 137 138 format 138 139 """ 139 nativeid_spectra = defaultdict(dict) 140 nativeid = 1 141 for line in dat_file: 142 nativeid_str = ''.join(("query", str(nativeid))) 143 query_line = SECTION_TEMPLATE.substitute(section=nativeid_str) 144 if line.strip() == query_line: 145 current_id_mgf = nativeid_spectra[nativeid] 146 nativeid += 1 147 elif line.startswith("title="): 140 extraspec = list() 141 for line in dat_file: 142 if line.startswith("title="): 148 143 line = line.strip() 149 144 try: 150 145 quoted_rawfn = re.search(self.rawfn_regex, line).group(1) 146 rawfn = urllib.unquote(quoted_rawfn) 151 147 except AttributeError: 152 sys.exit("It seems there is no raw: field in TITLE") 153 rawfn = urllib.unquote(quoted_rawfn) 154 current_id_mgf["RawFile"] = rawfn 155 if self.scans_in_title: 156 try: 157 scan = int(re.search(self.scan_regex, line).group(1)) 158 except AttributeError: 159 sys.exit("Default scan regex for TITLE field" 160 " doesn't match anything") 161 current_id_mgf['Scan'] = scan 162 elif line.startswith("charge="): 163 charge = line.strip().split('=')[1] 164 current_id_mgf['Charge'] = charge 165 elif not self.scans_in_title and line.startswith("scans="): 166 scan = int(line.strip().split('=')[2]) 167 current_id_mgf['Scan'] = scan 168 return nativeid_spectra 148 sys.exit("It seems the raw regex is not valid") 149 try: 150 scan = int(re.search(self.scan_regex, line).group(1)) 151 except AttributeError: 152 sys.exit("Default scan regex for TITLE field" 153 " doesn't match anything") 154 extraspec.append((rawfn, scan)) 155 return extraspec 169 156 170 157 def parse(self, dat_file): … … 175 162 summspec = self._parse_summary(dat_file) 176 163 nativeid_psms = self._parse_psms(dat_file) 177 extraspec = self._parse_spectra(dat_file) 178 for native_id, psms in nativeid_psms.items(): 179 for psm in psms: 180 psm['#SpectraFile'] = pkl_fn 181 psm['FragMode'] = frag_mode 182 psm.update(nativeid_spectra[native_id]) 183 yield psm 164 extraspec = self._parse_extraspec(dat_file) 165 #for native_id, psms in nativeid_psms.items(): 166 # for psm in psms: 167 # psm['#SpectraFile'] = pkl_fn 168 # psm['FragMode'] = frag_mode 169 # psm.update(nativeid_spectra[native_id]) 170 # yield psm 171 pass 184 172 185 173 def get_prot_ids(accs):
Note: See TracChangeset
for help on using the changeset viewer.