source: trunk/brs2010p35/src/jag/association_analysis.py @ 2

Last change on this file since 2 was 2, checked in by tim.te.beek@…, 8 years ago

Add revision 338 snapshot of previous SVN instance, omitting PLINK binaries

File size: 5.6 KB
Line 
1'''
2Created on Dec 13, 2010
3
4@author: maartenk, esther lips
5
6Script performs association analysis/self-contained test.
7
8Saves result from plink as results.{.PhenotypeNumber}.assoc
9
10'''
11
12import logging as log
13import glob
14import jag.clusterresults as CR
15import jag.common as common
16import jag.file_fetch_and_write as in_out
17import os
18import jag.plot_with_r as plot_with_r
19import sys
20
21
22
23class AssociationAnalysis(object):
24    '''
25    Class to group Association Analysis methods.
26    '''
27
28    def __init__(self, inout):
29        self.in_and_out = inout
30        self.files = {}
31
32
33    def get_aa_results(self, plinkin, group, gene_based=False):
34        """
35        check if there is already sumlog*.log in current dir and return as cluster
36        results file. If files are not present create the files
37
38        Returns list of cluster results objects
39        """
40
41        results_files = self.in_and_out.get_sumlog_filenames()
42        aa_results = []
43       
44        if (len(results_files) > 0):
45            for i in range(len(results_files)):
46                if(os.path.exists(results_files[i])):
47                    file_handle = common.getfile_handle(results_files[i])   #read file
48                    aa_result = CR.Clusterresults()
49                    aa_result.read_formated_results(file_handle)
50                   
51                    if (aa_result.group_path == group):
52                        aa_results.append(aa_result)
53                    else:
54                        log.info("sumlog file does not have identical path to group file\n"
55                            + "Will run Association analysis again")
56                        aa_result = self.run_step1(plinkin, group, gene_based)
57                        clusterresults = [pheno_dict["clusterresults"] for  pheno_dict in aa_result.values()]
58                        aa_results.extend(clusterresults)
59                        break
60                else:
61                    aa_result = self.run_step1(plinkin, group, gene_based)
62                    clusterresults = [pheno_dict["clusterresults"] for pheno_dict in aa_result.values()]
63                    aa_results.extend(clusterresults)
64        else:
65
66            clusterresults = [pheno_dict["clusterresults"] for  pheno_dict in self.run_step1(plinkin, group, gene_based).values()]
67            aa_results = clusterresults
68
69        return aa_results
70
71
72    def run_step1(self, plink, group, gene_group=False, adjusted=False):
73        """
74        main function of association analysis. this function calls all function
75        needed for aa analysis.
76        plink. a plink object. Example to create this kind of object can be found in test/test_jag.py
77        group: path to file which contains file mapping
78        """
79        #check files are available
80        #read clusters of genes as dictonary (key=snp, value is clustername)
81        if (not group == None):
82            snptogroup = common.map_snp_to_group(group, gene_group)
83                   
84        pheno_arg = ""
85       
86        if (not plink.pheno_file == None):
87            pheno_arg = " --pheno " + str(plink.pheno_file + " --all-pheno ")
88
89        if not plink.covar_file == None:
90            pheno_arg += " --covar " + plink.covar_file + " --hide-covar "
91            plink.set_plink_arguments("--bfile " + plink.bfile + " " + pheno_arg)
92       
93        else:
94            plink.set_plink_arguments("--bfile " + plink.bfile + " --assoc " + pheno_arg)
95       
96        resultfile = plink.run_plink()
97
98        assoc_files = glob.glob(resultfile + "*.*assoc*")
99               
100        if adjusted:
101            assoc_files = [assoc_file for assoc_file in assoc_files if assoc_file.endswith("adjusted")]
102               
103        else:
104            assoc_files = [assoc_file for assoc_file in assoc_files if not assoc_file.endswith("adjusted")]
105         
106        for assoc_file in assoc_files:  #    'get relevant p values'
107            clusterresults = CR.Clusterresults()            #    read values from assoc assoc_file and lookup in snptogroup map:
108            clusterresults.pheno_path = plink.pheno_file
109            clusterresults.total_columns = plink.get_number_of_pheno_types()
110            clusterresults.column_number = str(in_out.get_assoc_file_number(resultfile, assoc_file))
111            self.files[clusterresults.column_number] = {}
112            clusterresults.bfile_path = plink.bfile
113            clusterresults.group_path = group
114
115            moved_assoc_file = self.in_and_out.copy_assoc_file(assoc_file)
116           
117            if (not group == None):
118                clusterresults.map_p_values_from_assoc_file(snptogroup, assoc_file, adjusted)      # extract need p values from asoc file
119                self.files[clusterresults.column_number]["raw_p_files"] = clusterresults.write_all_pvalues()
120                clusterresults.createcleanresults()
121                self.files[clusterresults.column_number]["clusterresults"] = clusterresults
122                self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate sumlog.out'
123               
124            else:
125                log.info ("No group file is set: no sumlog file is made.")
126                sys.exit("JAG is terminated.")
127                           
128            self.files[clusterresults.column_number]["assoc_files"] = moved_assoc_file              #saving full filepathfor creating plot later on
129
130        common.remove_plink_output(resultfile)          # remove all output from plink
131       
132        if self.in_and_out.run_rproject:
133            plotter = plot_with_r.call_r(self.in_and_out)
134            plotter.draw_qq_plots(self.files, adjusted)
135            plotter.draw_qq_plots_for_sets(self.files)
136
137        return self.files
Note: See TracBrowser for help on using the repository browser.