source: trunk/brs2010p35/src/jag/permutation.py @ 2

Last change on this file since 2 was 2, checked in by tim.te.beek@…, 8 years ago

Add revision 338 snapshot of previous SVN instance, omitting PLINK binaries

File size: 6.9 KB
Line 
1'''
2Created on Dec 15, 2010
3
4@author: maartenk, esther lips
5
6Performs permutation over results from AssociationAnalysis (or creates it when not found)
7permutation are only done for pheno types where sumlog.P{number}.log is found.
8'''
9
10from association_analysis import AssociationAnalysis
11import jag.common as common
12import jag.permutationresults as permutationresults
13import random
14import sys
15import logging as log
16
17
18
19class Permutation(object):
20    '''
21    handle the permutations on a group file
22    '''
23
24    def __init__(self, plink, group, in_out, no_emp=False):
25        self.plink = plink
26        self.group = group
27        self.files = {}
28        self.in_out = in_out
29        self.no_emp = no_emp
30        self.gene_based = False
31
32
33    def get_assoc_analysis(self):
34        """
35        get association analysis file
36        """
37
38        if not self.no_emp:
39            assoc_analysis = AssociationAnalysis(self.in_out)
40            aa_results = assoc_analysis.get_aa_results(self.plink, self.group, self.gene_based)
41
42            self.plink.phenotypes_present = _get_aa_phenotypes_present(aa_results)
43        else:
44            #create a list with number of phenotypes
45            aa_results = self.plink.get_number_of_pheno_types() * [False]
46            self.plink.phenotypes_present = range(1, len(aa_results) + 1)
47        return aa_results
48   
49
50    def localpermutation(self, nperm, random_seed, gene_group=False):
51        '''
52        Run permutation on local computer
53        '''
54       
55        self.gene_based = gene_group
56
57        aa_results = self.get_assoc_analysis()
58        #create a random seed
59        if (random_seed == None):
60            random_seed = _create_random_seed(8)
61
62        snp_to_group = common.map_snp_to_group(self.group, gene_group)
63
64        self.plink.set_snp_to_group_map(snp_to_group)
65
66        nperm = int(nperm)
67       
68        #set number of permutations per cpu
69        permperjob = int(50)
70        # When a covar file is set , multiple phenotypes can not be run at once
71        #because the covar is coupled by phenotype.this is a hack to prevent
72        #multiple  phenotypes are ran at once
73        if self.plink.covar_file:
74            permperjob = 1
75
76        jobs = _create_seeds_for_jobs(nperm, permperjob, random_seed)
77        #print("nperm=", str(nperm))
78       
79        # hoeveel permutaties zitten in resultfiles?
80        #print("jobs", str(jobs))
81        resultfiles = self.plink.run_permutation_single_core(jobs)
82                               
83        results_merged = merge_results(resultfiles)
84               
85        for key, perm_score_per_group in results_merged.iteritems():
86            perm_out_string = perm_score_per_group.format_permout()
87            #print(perm_out_string)
88            perm_out_filename = "perm." + str(random_seed) + ".P" + str(key) + ".out"
89            #save permutation score as table with random seed in filename
90            perm_filename = self.in_out.save_text_to_filename(perm_out_filename, perm_out_string)
91            log.info("\nSaved permutation results as " + perm_filename)
92            #make sure key is a string for concatenating
93            key = str(key)
94
95            self.files[key] = {"perm":perm_filename}
96           
97            if (self.no_emp == False):
98                column_aaresults_mapping = {}
99               
100                for i in range(len(aa_results)):
101                    column_aaresults_mapping[str(aa_results[i].column_number)] = i
102                   
103                current_aa_results = aa_results[column_aaresults_mapping[key]]
104                empp_out_text = perm_score_per_group.format_permutated_results(current_aa_results)
105
106                emp_filename = "empp.P" + key + ".out"
107                emp_filename = self.in_out.save_text_to_filename(emp_filename, empp_out_text)
108                log.info("Saved empirical pvalues as " + emp_filename)
109                #save created files
110                self.files[str(key)] ["empp"] = emp_filename
111
112
113def select_aa_on_pheno(aa_restult_list, pheno_number):
114    """
115    return from a list of aa_results a aa_result with column number "pheno_number"
116    """
117    for aa_result in aa_restult_list:
118        if(int(aa_result.column_number) == int(pheno_number)):
119            return(aa_result)
120    sys.exit("selection of phenotype failed")
121   
122
123def merge_list_of_dict_of_list(resultfiles):
124    """
125    Merge a list of dicts which contains list2, append list2 on bases of dicts keys
126    """
127    perm_score_per_group = {}
128    for key in resultfiles[0].keys():
129        perm_score_per_group[key] = []
130
131    for res_dict in resultfiles:
132        for key, value in res_dict.iteritems():
133            perm_score_per_group[key] = perm_score_per_group[key] + value
134
135    return perm_score_per_group
136
137
138def merge_results(resultfiles):
139    """
140    Merge a list of dicts which contains list2, append list2 on bases of dicts keys
141    """
142   
143    perm_per_pheno = {}
144    #loop if it is run on multiple cpu
145    for result in resultfiles:
146        for pheno_nr in result:
147            #print pheno_nr
148            fg_cluster = result[pheno_nr]
149           
150            if not perm_per_pheno.has_key(pheno_nr):
151                perm_per_pheno[pheno_nr] = permutationresults.PermutatedResults()
152               
153            for groupname in fg_cluster.permutated_scores.keys():
154               
155                scores = fg_cluster.permutated_scores[groupname]
156                #print("Scores:", str(scores))
157                perm_per_pheno[pheno_nr].add_results(groupname, scores)
158   
159            perm_per_pheno[pheno_nr].add_seeds(fg_cluster.seeds)
160
161    return perm_per_pheno
162
163
164def _create_seeds_for_jobs(nperm, permpercpu, user_set_random_seed):
165    """
166    create seeds for each permutation job
167    """
168    random.seed(user_set_random_seed)
169    seeds = nperm * [0]
170    for i in range(len(seeds)):
171        seeds[i] = random.random()
172    amountofruns = int(nperm / permpercpu)
173
174    n_perm_non_fair_dist = int(nperm % permpercpu)
175    #create first job including non fair permutations
176
177    if (amountofruns == 0):
178        amountofruns = 1
179    jobs = [0] * amountofruns
180    start_of_slice = permpercpu + n_perm_non_fair_dist
181    jobs[0] = seeds[:start_of_slice]
182    for run in range(1, amountofruns):
183        jobs[run] = seeds[start_of_slice:start_of_slice + permpercpu]
184        start_of_slice = start_of_slice + permpercpu
185
186    return jobs
187
188
189def _create_random_seed(length):
190    """
191    creates a random seed that is a valid filename.
192    The length of the random seed can be given as int
193    """
194    randomized = random.SystemRandom()
195
196    digits = "".join([str(x) for x in range(0, 10)])
197    small = "".join([chr(x) for x in range(97, 123)])
198    big = "".join([chr(x) for x in range(65, 91)])
199    others = "-_"
200
201    all_characters = digits + small + big + others
202    return "".join([randomized.choice(all_characters) for x in range(length)])
203
204
205def _get_aa_phenotypes_present(aa_results):
206    phenotypes_present = sorted([int(res.column_number) for res in aa_results])
207    return phenotypes_present
Note: See TracBrowser for help on using the repository browser.