source: trunk/brs2010p35/src/jag/permutationresults.py @ 2

Last change on this file since 2 was 2, checked in by tim.te.beek@…, 8 years ago

Add revision 338 snapshot of previous SVN instance, omitting PLINK binaries

File size: 6.8 KB
Line 
1'''
2Created on Dec 14, 2010
3
4@author: maartenk, esther lips
5'''
6
7import jag.common as common
8import sys
9
10
11class PermutatedResults:
12    """
13    Handles the results of all permutations
14    """
15    def __init__(self):
16        self.raw_p_value = {}
17        self.permutated_scores = {}
18        self.seeds = []
19       
20
21    def add_seeds(self, seeds):
22        """add seeds to permuted results to make reproducing possible"""
23        if not isinstance(seeds, list):
24            raise AssertionError
25        self.seeds.extend(seeds)
26
27
28    def add_results(self, groupname, scores):
29        """
30        add score to list of permuted scores . Values are stored per group name
31        """
32        if not self.permutated_scores.has_key(groupname):
33            self.permutated_scores[groupname] = []
34        #add array
35        self.permutated_scores[groupname].extend(scores)
36
37
38    def add_snp_to_cluster(self, p_value, gene_and_cluster_list):
39        """
40        add a P value found in PLINK output to the cluster in gene_and_cluster_list
41        """
42        for cluster in gene_and_cluster_list:
43            if  not cluster["c"] in self.raw_p_value:
44                self.raw_p_value[cluster["c"]] = []
45
46            self.raw_p_value[cluster["c"]].append(float(p_value))
47
48
49    def process_permutation(self):
50        """
51        calculate the negsumlog10 values from the raw_p_values and add to right cluster in permutated_scores
52        remove raw_p_values
53        """
54        for cluster in self.raw_p_value.iterkeys():
55           
56            if not cluster in self.permutated_scores:
57                self.permutated_scores[cluster] = []
58               
59            nsumlog10 = common.negsumlog10(self.raw_p_value[cluster])
60            self.permutated_scores[cluster].append(nsumlog10)
61
62        #clean the raw p values for next permutation
63        self.raw_p_value = {}
64        return self.permutated_scores
65
66
67    def read_permout(self, files):
68        """
69        read multiple permout files and make one permutationresults file
70        """
71        for permfile in files:
72            file_handle = common.getfile_handle(permfile)
73            text_as_list = file_handle.readlines()#pylint: disable=E1103
74            header = text_as_list[0].strip().split("\t")
75            #check header last column is seed
76            if(not header[len(header) - 1] == "seed"):
77                sys.exit("last column of " + permfile + " does not have the \"seed\"")
78            #create empty list of list to store columns
79            results = [  [] for i in range(len(header))]
80
81            for line in range(1, len(text_as_list)):
82                splitted_line = text_as_list[line].strip().split("\t")
83                for i in range(len(splitted_line)):
84                    results[i].append(float(splitted_line[i]))
85            #put the results list of list in permutation results
86            for i in range(len(header) - 1):
87                self.add_results(header[i], results[i])
88            self.add_seeds(results[len(header) - 1])
89
90
91    def format_permout(self):
92        """format the permutations score in a table like manner with as column headers
93        groupname.
94        """
95
96        groupnames = self.permutated_scores.keys()
97        groupnames.sort()
98        length_permutations = [len(self.permutated_scores[g]) for g in groupnames]
99       
100        diffrence_in_length = sum([abs(l - length_permutations[0])for l in length_permutations])
101        formated_results = '\t'.join(groupnames) + "\tseed\n"
102       
103        if(diffrence_in_length == 0):
104            for i in range(length_permutations[0]):
105                formated_results += '\t'.join([str(self.permutated_scores[p][i])for p in groupnames])
106                formated_results += "\t" + str(self.seeds[i]) + "\n"
107        else:
108            sys.exit("Number of groups are not equal over all permuted files.")
109        #check length of each key is equal
110        return(formated_results)
111
112
113    def format_permutated_results(self, aa_result):
114        """
115        return nice formated results of permutation with statistics like variance and ngenes etc
116        """
117        message = "Group\tsumlogReal\tnperm\temp_p\tnSNP\tnGenes"
118        message += "\tvar(Perms)\tmean(Perms)\tnEff\n"
119        for groupname in sorted(self.permutated_scores.iterkeys()):
120            sum_neg_log = aa_result.get_sum_neglog_10_score(groupname)
121            n_snp = aa_result.get_n_snps(groupname)
122            group_score = self.permutated_scores[groupname]
123            message += groupname + "\t"
124            message += str(sum_neg_log) + "\t"
125            message += str(len(group_score)) + "\t"
126            message += str(calculate_emperical_p_value(sum_neg_log, group_score)) + "\t"
127            message += str(n_snp) + "\t"
128            message += str(aa_result.get_n_genes_of_group(groupname)) + "\t"
129            message += str(common.variance(group_score)) + "\t"
130            message += str(common.average(group_score)) + "\t"
131            message += str(calculate_effective_snps(n_snp, group_score)) + "\n"
132       
133        return message
134
135
136
137def calculate_emperical_p_value(score, permutationscores):
138    """
139    calculate empirical pvalue - self contained test
140    """
141
142    score = float(score)
143    permutationscores = [float(x) for x in  permutationscores]
144    amount_perm_bigger_score = len([x for x in  permutationscores if x > score])
145   
146    return (float(amount_perm_bigger_score) / float(len(permutationscores)))
147
148
149
150def calc_emp_emp(score, permutationscores):
151    """
152    calculate empirical pvalue - competetive test
153    """
154
155    score = float(score)
156    permutationscores = [float(x) for x in  permutationscores]
157    amount_perm_bigger_score = len([x for x in  permutationscores if x < score])
158   
159    return (float(amount_perm_bigger_score) / float(len(permutationscores)))
160
161
162def calculate_effective_snps(n_snps, perm):
163    """
164    in R:
165    nEff=trunc(nSNPs*nSNPs*0.189/var(Perms))
166    """
167    effective_snps = -1
168    variance = common.variance(perm)
169    if (variance == "NA"):
170        effective_snps = "NA"
171    elif(variance == 0.0):
172        effective_snps = "NA"
173    else:
174        effective_snps = (int(float(n_snps) * float(n_snps) * 0.189 / variance))
175    return(effective_snps)
176
177
178def read_permutated_results(permfile):
179    """
180    read the permutated results of a a permutation file and return the
181    header and file as list of lists
182    """
183    file_handle = common.getfile_handle(permfile)
184    text_as_list = file_handle.readline()
185    header = text_as_list.strip().split("\t")
186    #create empty list of list to store columns
187    results = [[] for i in range(len(header))]
188    for line in file_handle:
189        splitted_line = line.strip().split("\t")
190        #add group name from first column
191        results[0].append(splitted_line.pop(0))
192        for i , value in enumerate(splitted_line, 1):
193            results[i].append(float(value))
194
195    return (header, results)
196
Note: See TracBrowser for help on using the repository browser.