Changeset 30
- Timestamp:
- Aug 11, 2010, 4:06:01 PM (11 years ago)
- bzr:base-revision:
- j@dannynavarro.net-20100806131029-8dov1nqec1ao917u
- bzr:committer:
- Danny Navarro <j@dannynavarro.net>
- bzr:file-ids:
mzcms/models.py models.py-20100730084238-fjjwldiefr0w07zv-4
mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1- bzr:mapping-version:
- v4
- bzr:repository-uuid:
- 724254b2-fbe6-419d-9466-c04ef4c9d29d
- bzr:revision-id:
- j@dannynavarro.net-20100806140012-pvqfaqhcmlz08i2f
- bzr:revno:
- 30
- bzr:revprop:branch-nick:
- trunk
- bzr:root:
- trunk
- bzr:timestamp:
- 2010-08-06 16:00:12.605999947 +0200
- bzr:user-agent:
- bzr2.1.2+bzr-svn1.0.3
- svn:original-date:
- 2010-08-06T14:00:12.606000Z
- Location:
- trunk/mzcms
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/mzcms/models.py
r25 r30 16 16 class Proteins(Folder): 17 17 """Proteins folder factory""" 18 def __init__(self , proteins):18 def __init__(self): 19 19 self.name = 'proteins' 20 20 -
trunk/mzcms/parsers.py
r29 r30 46 46 self.contaminant_regex = re.compile(contaminant_str) 47 47 48 def _parse_spectra_fn(self, dat file):48 def _parse_spectra_fn(self, dat_file): 49 49 """Parses the spectra file name used for Mascot search 50 50 """ 51 for line in dat file:51 for line in dat_file: 52 52 if line.startswith("FILE="): 53 53 full_path = line.strip().split('=')[1] … … 55 55 return norm_path.split("/")[-1] 56 56 57 def _parse_frag_mode(self, dat file):57 def _parse_frag_mode(self, dat_file): 58 58 """Get the fragmentation type by looking at dat parameters 59 59 """ 60 60 # TODO: Handle CID and error for other instruments 61 for line in dat file:61 for line in dat_file: 62 62 if line.startswith("INSTRUMENT="): 63 63 frag_line = line.strip().split('=')[1] … … 69 69 return frag_line 70 70 71 def _parse_ annotations(self, datfile):71 def _parse_psms(self, dat_file): 72 72 """Parses the peptide section of mascot 73 73 """ 74 nativeid_ annotations = defaultdict(list)74 nativeid_psms = defaultdict(list) 75 75 # Seek until Mascot peptide section 76 76 peptide_section = SECTION_TEMPLATE.substitute(section="peptides") 77 for line in dat file:77 for line in dat_file: 78 78 if line.strip() == peptide_section: 79 79 break 80 for line in datfile: 80 for line in dat_file: 81 import ipdb; ipdb.set_trace() 81 82 match = re.match(PEP_REGEX, line) 82 83 if match: … … 90 91 # the line with flanking Aa 91 92 # q4_p2_terms=E,K:E,K:E,K:E,K:E,K 92 line = dat file.next().strip()93 line = dat_file.next().strip() 93 94 # (('E', 'K'), ('E', 'K'), ('E', 'K')) 94 95 flanking_pairs = set(tuple(x.split(',')) … … 98 99 flanking_pair[0], peptide, flanking_pair[1])) 99 100 annotation = annotation.replace('-', '*') 100 nativeid_ annotations[nativeid].append(dict(101 nativeid_psms[nativeid].append(dict( 101 102 Annotation=annotation, IsDecoy=is_decoy)) 102 103 elif line.strip() == SEPARATOR: 103 return nativeid_ annotations104 return nativeid_psms 104 105 105 106 def _parse_spectra(self, datfile): … … 141 142 return nativeid_spectra 142 143 143 def parse(self, dat file):144 def parse(self, dat_file): 144 145 """Takes a dat file and returns a dictionary of psms. 145 146 """ 146 pkl_fn = self._parse_spectra_fn(dat file)147 frag_mode = self._parse_frag_mode(dat file)148 nativeid_ annotations = self._parse_annotations(datfile)147 pkl_fn = self._parse_spectra_fn(dat_file) 148 frag_mode = self._parse_frag_mode(dat_file) 149 nativeid_psms = self._parse_psms(dat_file) 149 150 nativeid_spectra = self._parse_spectra(datfile) 150 151 for native_id, annotations in nativeid_annotations.items(): … … 196 197 """Parses all the dat files in dats_dir 197 198 """ 198 dat_paths = (f for f in os.walk(dats_dir))199 199 proteins = proteins_factory() 200 200 peptides = peptides_factory() … … 202 202 psms = psms_factory() 203 203 dat_parser = DatParser() 204 for dat_path in dat_paths: 205 with open(dat_path) as dat_file: 206 dat_proteins, dat_peptides, \ 207 dat_spectra, dat_psms = dat_parser.parse(dat_file) 208 proteins.update(dat_proteins) 209 peptides.update(dat_peptides) 210 spectra.update(dat_spectra) 211 psms.update(dat_psms) 204 for root, dirs, files in os.walk(dats_dir): 205 for dat_fn in files: 206 # TODO: exception for not dat files 207 with open(os.path.join(root, dat_fn)) as dat_file: 208 dat_proteins, dat_peptides, \ 209 dat_spectra, dat_psms = dat_parser.parse(dat_file) 210 proteins.update(dat_proteins) 211 peptides.update(dat_peptides) 212 spectra.update(dat_spectra) 213 psms.update(dat_psms) 212 214 return proteins, peptides, spectra, psms 213 215
Note: See TracChangeset
for help on using the changeset viewer.