source: trunk/brs2010p35/src/jag/clusterresults.py @ 2

Last change on this file since 2 was 2, checked in by tim.te.beek@…, 8 years ago

Add revision 338 snapshot of previous SVN instance, omitting PLINK binaries

File size: 6.2 KB
Line 
1'''
2Created on Dec 14, 2010
3
4@author: maartenk, esther lips
5
6Stores, reads and write results and calculate concatenated test statistics
7'''
8
9import jag.common as common
10import sys
11import logging as log
12
13
14
15class Clusterresults(object):
16    '''
17    Stores and reads and write results and calculate concatenated scores
18    '''
19
20    def __init__(self):
21        self.cluster_results = {}
22        self.column_number = None
23        self.total_columns = None
24        self.pheno_path = None
25        self.group_path = None
26        self.bfile_path = None
27
28        self.clean_results = {}
29
30
31    def add_snp_to_cluster_all_info(self, p_value, snp_id, gene_and_cluster_list):
32        """
33        add a P value found in PLINK output to the cluster in gene_and_cluster_list
34        """
35        for cluster in gene_and_cluster_list:
36            if  not cluster["c"] in self.cluster_results:
37                self.cluster_results[cluster["c"]] = {"genes":[], "snp":[], "p-value":[]}
38               
39            self.cluster_results[cluster["c"]]["genes"].append(cluster["g"])
40            self.cluster_results[cluster["c"]]["snp"].append(snp_id)
41           
42            try:
43                self.cluster_results[cluster["c"]]["p-value"].append(float(p_value))
44               
45            except ValueError:
46                log.info("ERROR: could not transform the following p-value to a number: " \
47                      + str(p_value))
48                if (p_value == "P"):
49                    log.info("The group file should not contain a header")
50
51                sys.exit(1)
52
53
54    def write_all_pvalues(self):
55        """
56        Write all P values and return file containing P-values each on a new line.
57        """
58       
59        message = []
60        for group  in self.cluster_results.iterkeys():
61            pvalues = self.cluster_results[group]["p-value"]
62            message.extend([group + "\t" + str(pvalue) for pvalue in pvalues])
63
64        filename = common.write_text_to_tempfile("\n".join(message))
65
66        return(filename)
67
68
69    def map_p_values_from_assoc_file(self, snptogroup, assoc_file, adjusted=False):
70        """
71        extracts P-values from a assoc file and adds them based on the
72        snptogroup mapping to the right gene-set
73       
74        """
75        file_handle = common.getfile_handle(assoc_file)
76       
77        for line in file_handle:
78            splitted = line.split()
79            snp_id = splitted[1]
80           
81            if adjusted:
82                p_value = splitted[3]
83                               
84            else:
85                p_value = splitted[8]
86                               
87            if (snp_id in snptogroup):
88                self.add_snp_to_cluster_all_info(p_value, snp_id, snptogroup[snp_id])
89
90
91    def createcleanresults(self):
92        """
93        summarize cluster_results and save it in self.clean_results
94        """
95
96        for cluster in self.cluster_results.iterkeys():
97            curren_cluster = self.cluster_results[cluster]
98            n_snps = len(set(curren_cluster["snp"]))
99            n_genes = len(set((curren_cluster["genes"])))
100            nsumlog10 = common.negsumlog10(curren_cluster["p-value"])
101
102            self.clean_results[cluster] = {"n_snps":n_snps,
103                                        "n_genes":n_genes,
104                                        "sumneglog10":nsumlog10}
105        return(self.clean_results)
106
107
108    def get_sum_neglog_10_score(self, group):
109        """
110        retrieve sum neglog 10 value if not found return -1
111        """
112        score = -1
113        if group in self.clean_results:
114            score = self.clean_results[group]["sumneglog10"]
115        else:
116            sys.exit("could not find neglog_10 for group named" + str(group))
117        return score
118
119
120    def get_n_snps(self, group):
121        """
122        retrieve amount of snps if not found return -1
123
124        """
125        n_snp = -1
126        if group in self.clean_results:
127            n_snp = self.clean_results[group]["n_snps"]
128        else:
129            sys.exit("could not find score for group named" + str(group))
130        return n_snp
131
132
133    def get_n_genes_of_group(self, group):
134        """
135        get number of genes that is in the group
136        """
137        n_genes = -1
138        if group in self.clean_results:
139            n_genes = self.clean_results[group]["n_genes"]
140        else:
141            sys.exit("could not find score for group named " + str(group))
142        return n_genes
143
144
145    def format_results(self):
146        """
147        format the results in a way that a user can read AND read_formated_results can read
148        """
149
150        formated_text = "phenotype #\t" + str(self.column_number)
151        formated_text += "\n# of phenotypes in pheno file\t" + str(self.total_columns)
152        formated_text += "\npheno file path\t" + str(self.pheno_path)
153        formated_text += "\ngroup file path\t" + str(self.group_path)
154        formated_text += "\nbfile path\t" + str(self.bfile_path)
155
156        formated_text += "\nGroup\tnSNPs\tnGENES\tsumlog10(p)\n"
157        for cluster in sorted(self.clean_results.iterkeys()):
158            current_cluster = self.clean_results[cluster]
159            formated_text += str(cluster)
160            formated_text += "\t" + str(current_cluster["n_snps"])
161            formated_text += "\t" + str(current_cluster["n_genes"])
162            formated_text += "\t" + str(current_cluster["sumneglog10"]) + "\n"
163        return (formated_text)
164
165
166    def read_formated_results(self, file_handle):
167        """
168        import the results saved by format_results() and populate this class
169        """
170
171        text_as_list = file_handle.readlines()
172
173        if len(text_as_list) == 0:
174            raise AssertionError
175        self.column_number = text_as_list[0].strip().split("\t")[1]
176        self.total_columns = text_as_list[1].strip().split("\t")[1]
177        self.pheno_path = text_as_list[2].strip().split("\t")[1]
178        self.group_path = text_as_list[3].strip().split("\t")[1]
179        self.bfile_path = text_as_list[4].strip().split("\t")[1]
180
181        text_clean = [c.split("\t") for c in[ l.strip() for l in text_as_list[5:]]]
182        for i in range(1, len(text_clean)):
183            self.clean_results[text_clean[i][0]] = {"n_snps":text_clean[i][1],
184                                        "n_genes":text_clean[i][2],
185                                        "sumneglog10":text_clean[i][3]}
186
187
188
Note: See TracBrowser for help on using the repository browser.