Changeset 36
- Timestamp:
- Aug 11, 2010, 4:07:46 PM (11 years ago)
- bzr:base-revision:
- j@dannynavarro.net-20100809093614-khwr3ax29z5bvx1p
- bzr:committer:
- Danny Navarro <j@dannynavarro.net>
- bzr:file-ids:
mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1- bzr:mapping-version:
- v4
- bzr:repository-uuid:
- 724254b2-fbe6-419d-9466-c04ef4c9d29d
- bzr:revision-id:
- j@dannynavarro.net-20100809114648-ieq7nmfd0ljlvleh
- bzr:revno:
- 36
- bzr:revprop:branch-nick:
- trunk
- bzr:root:
- trunk
- bzr:timestamp:
- 2010-08-09 13:46:48.609999895 +0200
- bzr:user-agent:
- bzr2.1.2+bzr-svn1.0.3
- svn:original-date:
- 2010-08-09T11:46:48.610000Z
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/mzcms/parsers.py
r35 r36 10 10 from string import Template 11 11 from collections import defaultdict 12 from collections import namedtuple 12 13 13 14 SEPARATOR = '--gc0p4Jq0M2Yt08jU534c0p' … … 15 16 r'Content-Type: application/x-Mascot; name="$section"') 16 17 17 PEP_REGEX = re.compile(r'^q(\d+)' # nativeid18 '_p \d+' # rank19 '=\d+?' # missed cleavages20 ', [\.\d]+?' # peptide mass21 ', [-\.\d]+?' # delta mass22 ',\d+?' # n ions matches23 ',(\w+?)' # peptide string24 ',\d+?' # peaks used for Ions125 ',(\d+?)' # modstring26 ', [\.\d]+?' # score27 ',\d+?' # ion series found28 ',\d+?' # peaks used for Ions229 ',\d+?' # peaks used for Ions330 ';(.+)$' # protein accessions string18 PEP_REGEX = re.compile(r'^q(\d+)' # nativeid 19 '_p(\d+)' # rank 20 '=\d+?' # missed cleavages 21 ',([\.\d]+?)' # peptide mass 22 ',([-\.\d]+?)' # delta mass 23 ',\d+?' # n ions matches 24 ',(\w+?)' # peptide string 25 ',\d+?' # peaks used for Ions1 26 ',(\d+?)' # modstring 27 ',([\.\d]+?)' # score 28 ',\d+?' # ion series found 29 ',\d+?' # peaks used for Ions2 30 ',\d+?' # peaks used for Ions3 31 ';(.+)$' # protein accessions string 31 32 ) 32 33 #fraction_regex = re.compile(r'fraction%3a%20(\d+)') … … 83 84 import ipdb; ipdb.set_trace() 84 85 nativeid = int(match.group(1)) 85 peptide_str = match.group(2) 86 mods_str = match.group(3) 87 accs = match.group(4) 88 peptide = apply_mods(peptide_str, mods_str) 89 is_decoy = check_decoy(accs, self.decoy_regex) 90 # Assuming that after a peptide line, always the next line is 91 # the line with flanking Aa 92 # q4_p2_terms=E,K:E,K:E,K:E,K:E,K 93 line = dat_file.next().strip() 94 # (('E', 'K'), ('E', 'K'), ('E', 'K')) 95 flanking_pairs = set(tuple(x.split(',')) 96 for x in line.split("=")[1].split(":")) 97 for flanking_pair in flanking_pairs: 98 annotation = '.'.join(( 99 flanking_pair[0], peptide, flanking_pair[1])) 100 annotation = annotation.replace('-', '*') 101 nativeid_psms[nativeid].append(dict( 102 Annotation=annotation, IsDecoy=is_decoy)) 86 rank = int(match.group(2)) 87 pep_mass = float(match.group(3)) 88 delta_mass = float(match.group(4)) 89 pep_seq = match.group(5) 90 mods_str = match.group(6) 91 score = match.group(7) 92 accs = match.group(8) 93 prot_ids = get_prot_ids(accs) 94 is_target = check_target(accs, self.decoy_regex) 95 if is_target and rank == 1 or rank == 2: 96 mascot_psm = MascotPsm('mascot_psm', 97 rank=rank, 98 pep_mass=pep_mass, 99 pep_seq=pep_seq, 100 prot_ids=prot_ids 101 ) 102 nativeid_psms[nativeid].append(mascot_psm) 103 103 elif line.strip() == SEPARATOR: 104 104 return nativeid_psms
Note: See TracChangeset
for help on using the changeset viewer.