Changeset 61
- Timestamp:
- Aug 11, 2010, 4:14:54 PM (11 years ago)
- bzr:base-revision:
- j@dannynavarro.net-20100811130247-m5vcvd0g77y9h9ko
- bzr:committer:
- Danny Navarro <j@dannynavarro.net>
- bzr:file-ids:
mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1- bzr:mapping-version:
- v4
- bzr:repository-uuid:
- 724254b2-fbe6-419d-9466-c04ef4c9d29d
- bzr:revision-id:
- j@dannynavarro.net-20100811131617-5n3z2dsngary27hz
- bzr:revno:
- 61
- bzr:revprop:branch-nick:
- trunk
- bzr:root:
- trunk
- bzr:timestamp:
- 2010-08-11 15:16:17.500000000 +0200
- bzr:user-agent:
- bzr2.1.2+bzr-svn1.0.3
- svn:original-date:
- 2010-08-11T13:16:17.500000Z
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/mzcms/parsers.py
r59 r61 264 264 writer.writerow(psm) 265 265 266 def parse_fasta(fasta_path, proteins): 267 # XXX: Improve parser, don't seek back 268 """Update protein sequence container from fasta file""" 269 fasta_file = open(fasta_path, 'rb') 270 while True: 271 line = fasta_file.readline().decode('utf-8').strip() 272 if line.startswith('>IPI:IPI'): 273 seq_lines = list() 274 prot_id = line.split('|')[0][5:] 275 while True: 276 try: 277 line = fasta_file.readline().decode('utf-8').strip() 278 except StopIteration: 279 return 280 if not line.startswith('>'): 281 seq_lines.append(line) 282 last_line = fasta_file.tell() 283 else: 284 sequence = ''.join(seq_lines) 285 if prot_id in proteins: 286 #proteins[prot_id].sequence == 'TBI': 287 proteins[prot_id].sequence = sequence 288 fasta_file.seek(last_line) 289 break 290 266 291 # XXX: Use better defaults for containers and factories 267 292 def parse_dats(dats_dir, proteins_container=dict, peptides_container=dict, … … 281 306 import transaction 282 307 for root, dirs, files in os.walk(dats_dir): 283 for dat_fn in files:284 if fnmatch.fnmatch( dat_fn, '*.dat'):285 with open(os.path.join(root, dat_fn)) as dat_file:308 for fn in files: 309 if fnmatch.fnmatch(fn, '*.dat'): 310 with open(os.path.join(root, fn)) as dat_file: 286 311 ser_proteins, ser_peptides, \ 287 312 ser_spectra, ser_psms = dat_parser.parse(dat_file) … … 300 325 psms[str(psm_id)] = psm 301 326 transaction.savepoint() 327 elif fnmatch.fnmatch(fn, '*.fasta'): 328 fasta_path = os.path.join(root, fn) 329 # XXX: Improve this function, has to return something 330 try: 331 parse_fasta(fasta_path, proteins) 332 except UnboundLocalError: 333 print("You must have the fasta database in the data to parse " 334 "directory") 335 302 336 303 337 def main():
Note: See TracChangeset
for help on using the changeset viewer.