Changeset 30


Ignore:
Timestamp:
Aug 11, 2010, 4:06:01 PM (10 years ago)
Author:
j@…
bzr:base-revision:
j@dannynavarro.net-20100806131029-8dov1nqec1ao917u
bzr:committer:
Danny Navarro <j@dannynavarro.net>
bzr:file-ids:

mzcms/models.py models.py-20100730084238-fjjwldiefr0w07zv-4
mzcms/parsers.py parsers.py-20100806092910-g1sxvv1o5b9umkof-1
bzr:mapping-version:
v4
bzr:repository-uuid:
724254b2-fbe6-419d-9466-c04ef4c9d29d
bzr:revision-id:
j@dannynavarro.net-20100806140012-pvqfaqhcmlz08i2f
bzr:revno:
30
bzr:revprop:branch-nick:
trunk
bzr:root:
trunk
bzr:timestamp:
2010-08-06 16:00:12.605999947 +0200
bzr:user-agent:
bzr2.1.2+bzr-svn1.0.3
svn:original-date:
2010-08-06T14:00:12.606000Z
Message:

Fixed file walker, various renames

Location:
trunk/mzcms
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/mzcms/models.py

    r25 r30  
    1616class Proteins(Folder):
    1717    """Proteins folder factory"""
    18     def __init__(self, proteins):
     18    def __init__(self):
    1919        self.name = 'proteins'
    2020
  • trunk/mzcms/parsers.py

    r29 r30  
    4646        self.contaminant_regex = re.compile(contaminant_str)
    4747
    48     def _parse_spectra_fn(self, datfile):
     48    def _parse_spectra_fn(self, dat_file):
    4949        """Parses the spectra file name used for Mascot search
    5050        """
    51         for line in datfile:
     51        for line in dat_file:
    5252            if line.startswith("FILE="):
    5353                full_path = line.strip().split('=')[1]
     
    5555                return norm_path.split("/")[-1]
    5656
    57     def _parse_frag_mode(self, datfile):
     57    def _parse_frag_mode(self, dat_file):
    5858        """Get the fragmentation type by looking at dat parameters
    5959        """
    6060        # TODO: Handle CID and error for other instruments
    61         for line in datfile:
     61        for line in dat_file:
    6262            if line.startswith("INSTRUMENT="):
    6363                frag_line = line.strip().split('=')[1]
     
    6969                    return frag_line
    7070
    71     def _parse_annotations(self, datfile):
     71    def _parse_psms(self, dat_file):
    7272        """Parses the peptide section of mascot
    7373        """
    74         nativeid_annotations = defaultdict(list)
     74        nativeid_psms = defaultdict(list)
    7575        # Seek until Mascot peptide section
    7676        peptide_section = SECTION_TEMPLATE.substitute(section="peptides")
    77         for line in datfile:
     77        for line in dat_file:
    7878            if line.strip() == peptide_section:
    7979                break
    80         for line in datfile:
     80        for line in dat_file:
     81            import ipdb; ipdb.set_trace()
    8182            match = re.match(PEP_REGEX, line)
    8283            if match:
     
    9091                # the line with flanking Aa
    9192                # q4_p2_terms=E,K:E,K:E,K:E,K:E,K
    92                 line = datfile.next().strip()
     93                line = dat_file.next().strip()
    9394                # (('E', 'K'), ('E', 'K'), ('E', 'K'))
    9495                flanking_pairs = set(tuple(x.split(','))
     
    9899                            flanking_pair[0], peptide, flanking_pair[1]))
    99100                    annotation = annotation.replace('-', '*')
    100                     nativeid_annotations[nativeid].append(dict(
     101                    nativeid_psms[nativeid].append(dict(
    101102                            Annotation=annotation, IsDecoy=is_decoy))
    102103            elif line.strip() == SEPARATOR:
    103                 return nativeid_annotations
     104                return nativeid_psms
    104105
    105106    def _parse_spectra(self, datfile):
     
    141142        return nativeid_spectra
    142143
    143     def parse(self, datfile):
     144    def parse(self, dat_file):
    144145        """Takes a dat file and returns a dictionary of psms.
    145146        """
    146         pkl_fn = self._parse_spectra_fn(datfile)
    147         frag_mode = self._parse_frag_mode(datfile)
    148         nativeid_annotations = self._parse_annotations(datfile)
     147        pkl_fn = self._parse_spectra_fn(dat_file)
     148        frag_mode = self._parse_frag_mode(dat_file)
     149        nativeid_psms = self._parse_psms(dat_file)
    149150        nativeid_spectra = self._parse_spectra(datfile)
    150151        for native_id, annotations in nativeid_annotations.items():
     
    196197    """Parses all the dat files in dats_dir
    197198    """
    198     dat_paths = (f for f in os.walk(dats_dir))
    199199    proteins = proteins_factory()
    200200    peptides = peptides_factory()
     
    202202    psms = psms_factory()
    203203    dat_parser = DatParser()
    204     for dat_path in dat_paths:
    205         with open(dat_path) as dat_file:
    206             dat_proteins, dat_peptides, \
    207             dat_spectra, dat_psms = dat_parser.parse(dat_file)
    208             proteins.update(dat_proteins)
    209             peptides.update(dat_peptides)
    210             spectra.update(dat_spectra)
    211             psms.update(dat_psms)
     204    for root, dirs, files in os.walk(dats_dir):
     205        for dat_fn in files:
     206            # TODO: exception for not dat files
     207            with open(os.path.join(root, dat_fn)) as dat_file:
     208                dat_proteins, dat_peptides, \
     209                dat_spectra, dat_psms = dat_parser.parse(dat_file)
     210                proteins.update(dat_proteins)
     211                peptides.update(dat_peptides)
     212                spectra.update(dat_spectra)
     213                psms.update(dat_psms)
    212214    return proteins, peptides, spectra, psms
    213215
Note: See TracChangeset for help on using the changeset viewer.