source: trunk/brs2010p35/src/jag/plink.py @ 2

Last change on this file since 2 was 2, checked in by tim.te.beek@…, 8 years ago

Add revision 338 snapshot of previous SVN instance, omitting PLINK binaries

File size: 9.2 KB
Line 
1'''
2Created on Dec 16, 2010
3
4@author: maartenk, esther lips
5
6Call the Plink program and return raw results as pathname or as score
7'''
8
9import logging as log
10import jag.common as common
11import jag.permutationresults as permutationresults
12import os
13import platform
14import re
15import subprocess
16import sys
17import tempfile
18
19
20class Plink(object):
21    """
22    handles input and output from plink
23    """
24    def __init__(self):
25        self.bfile = None
26        self.pheno_file = None
27        self.covar_file = None
28        self.plinkcommand = ""
29        self.snp_to_group_map = ""
30        self.arguments = None
31        self.switches = " "
32        self.phenotypes_present = []
33        self.verbose = False
34        self.print_command = False
35        self.permutations_completed = 0
36
37
38    def set_plink_arguments(self, arguments):
39        """
40        set arguments for plink except bfile and output
41        """
42        if(len(arguments) > 2):
43            self.arguments = arguments
44
45
46    def set_snp_to_group_map(self, snp_to_group_map):
47        """
48        check snp_to_group_map is accessible for the program
49        """
50        self.snp_to_group_map = snp_to_group_map
51
52
53    def get_number_of_pheno_types(self):
54        """
55        Get the number of phenotypes based on the
56        number of columns in the phenofile
57        """
58        if (self.pheno_file != None):
59            pheno = common.read_pheno_file(self.pheno_file)
60            total_columns = len(pheno[1][1].split("\t"))
61        else:
62            total_columns = 1
63        return (total_columns)
64
65
66    def _print_assoc_state(self, process, nr_of_phenotypes):
67        """
68        filter output when running a assoc analysis in Plink
69        """
70        output_of_plink = ""
71        test = re.compile("\.P(\d{0,4}){0,1}\.(q){0,1}assoc\s")
72        for line in iter(process.stdout.readline, ''):
73            console_out = line.rstrip()
74            match = test.search(console_out)
75           
76            if (match != None):
77                column_number = int(match.groups()[0])
78                if (nr_of_phenotypes != 0):
79                    if (column_number % nr_of_phenotypes == 0):
80                        perm_this_run = column_number / nr_of_phenotypes
81                        permutation_completed = str(self.permutations_completed + perm_this_run)
82                        log.info("permutation " + permutation_completed + " ready")
83                else:
84                    log.info("Ready with phenotype" + str(column_number))
85               
86            output_of_plink += str(console_out) + "\n"
87
88        return output_of_plink
89
90
91    def run_plink(self):
92        """
93        run a instance of plink
94        """
95        outfile = tempfile.mkstemp()
96        # select right function of plink
97        command = _find_os_right_plink()
98        #remove assoc option when logistic or lineaer are set
99        if (self.switches.count("--logistic") >= 1):
100            self.arguments = self.arguments.replace("--assoc", "")
101            #remove adust when logistic and covar is set
102            if (self.arguments.count("--covar") >= 1):
103                self.arguments = self.arguments.replace("--adjust", "")
104        if (self.switches.count("--linear") >= 1):
105            self.arguments = self.arguments.replace("--assoc", "")
106            #remove adust when linear and covar is set
107            if (self.arguments.count("--covar") >= 1):
108                self.arguments = self.arguments.replace("--adjust", "")
109
110        command += " --noweb " + self.arguments + self.switches + " --out " + outfile[1]
111        prefix = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
112        command = prefix + command
113       
114        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
115       
116        if (self.print_command):
117            log.info(self.arguments + self.switches + "\n")
118   
119        output_of_plink = ""
120        nr_of_phenotypes = len(self.phenotypes_present) # length is zero
121   
122        #nice output for assoc files
123        if(re.search("--assoc", self.arguments)):
124            output_of_plink = self._print_assoc_state(process, nr_of_phenotypes)
125        elif(re.search("--indep-pairwise", self.arguments)):
126            output_of_plink = _print_indep_pairwaise_state(process)
127        else:
128            log.info("no output filtering found for command" + self.arguments)
129            for line in iter(process.stdout.readline, ''):
130                log.info(line.strip())
131                output_of_plink += line
132       
133        if (self.verbose):
134            log.info(output_of_plink)
135        #check if plink is finished correctly by checking the output of plink "Analysis finished"
136        if not re.search("Analysis finished", output_of_plink):
137            log.info("PLINK did not finish analysis. Job is terminated.")
138            log.info(output_of_plink)
139            sys.exit(1)
140        return(outfile[1])
141
142
143
144    def point_outputfile_to_phenotype(self, seeds, resultfile, results_extension):
145        """
146        map the files from the permutation phenotypes to the right permutation
147        """
148        resultfiles = {}
149        amount_of_phenotypes = len(self.phenotypes_present)
150        #print("amount_of_phenotypes"+str(amount_of_phenotypes))
151        for phenotype in self.phenotypes_present:
152            phenotype=int(phenotype)
153            resultfiles[phenotype] = []
154            range_until=(len(seeds)*amount_of_phenotypes)
155            #the second number in range excludes the range : it does not includes the border
156            if (phenotype==amount_of_phenotypes):
157                range_until+=1
158         
159            numbers=range(phenotype,range_until,amount_of_phenotypes)
160            resultfiles[phenotype]= [resultfile+".P"+str(number)+"."+results_extension for number in numbers]
161           
162       
163        return resultfiles
164
165
166    def run_permutation(self, seeds):
167        """
168        performs permutations
169        """
170#        #print("SEEDS", str(seeds))
171        permuted_pheno_file = common.create_pheno_permutation_file(self, seeds)
172        pheno_arg = " --pheno " + str(permuted_pheno_file + " --all-pheno")
173
174        if self.covar_file:
175            permuted_covar_file = common.create_covar_permutation_file(self, seeds)
176            pheno_arg += " --covar " + permuted_covar_file + " --hide-covar"
177            self.set_plink_arguments("--bfile " + self.bfile + pheno_arg)
178        else:
179            self.set_plink_arguments("--bfile " + self.bfile + " --assoc " + pheno_arg)
180
181
182        resultfile = self.run_plink() # run plink
183        results_extension = common.plink_results_extention(resultfile)
184        resultfiles = self.point_outputfile_to_phenotype(seeds, resultfile, results_extension)
185        #print(str(resultfiles))
186        results = extract_permutated_scores(self.snp_to_group_map, resultfiles, seeds)
187                           
188        os.remove(permuted_pheno_file)  #remove permuted pheno file
189        common.remove_plink_output(resultfile)  # remove all output from plink
190     
191        #print(str(results))
192        return results
193
194
195    def run_permutation_single_core(self, jobmap):
196        """
197        run plink on a single cpu
198        """
199        permutations = []
200       
201        for seeds in jobmap:
202            permutations.append(self.run_permutation(seeds))
203            self.permutations_completed += len(seeds)
204   
205        return permutations
206
207
208def extract_permutated_scores(snp_to_group_map, resultfiles, seeds):
209    """
210    extract permuted scores from a assoc files
211    """
212    permuted_results = {}
213   
214    for pheno, files in resultfiles.iteritems():
215        clusterresults = permutationresults.PermutatedResults()
216       
217        for resultfile in files:
218            filehandle = common.getfile_handle(resultfile)
219           
220            for line in filehandle:
221                splitted = line.split()
222                snp_id = splitted[1]
223                p_value = splitted[8]
224               
225                if (snp_id in snp_to_group_map):
226                    clusterresults.add_snp_to_cluster(p_value, snp_to_group_map[snp_id])
227
228            clusterresults.process_permutation()
229
230        clusterresults.add_seeds(seeds)
231        permuted_results[pheno] = clusterresults
232       
233    return permuted_results
234
235
236def _find_os_right_plink():
237    """
238    Give the right path to plink depending on OS
239    """
240    command = ""
241    if(sys.platform == "linux2"):
242        bits = platform.architecture()[0]
243        if (bits == "64bit"):
244            command = "/../bin/plink-1.07-x86_64/plink"
245        elif(bits == "32bit"):
246            sys.exit("32 bit version of linux not supported")
247        else:
248            sys.exit("unknown version of linux")
249    elif(sys.platform == "darwin"):
250        command = "/../bin/plink-1.07-mac-intel/plink"
251    elif(sys.platform == "win32"):
252        sys.exit("Windows not supported")
253
254    return command
255
256
257def _print_indep_pairwaise_state(process):
258    """
259    filter output when running an indep_pairwaise in Plink
260    """
261    output_of_plink = ""
262    test = re.compile("\For chromosome (\d*),")
263    for line in iter(process.stdout.readline, ''):
264        console_out = line.rstrip()
265        match = test.search(console_out)
266        if (match != None):
267            log.info ("Pruned independent SNPs on chromosome " + match.groups()[0] + " ")
268
269        output_of_plink += str(console_out) + "\n"
270
271    return output_of_plink
Note: See TracBrowser for help on using the repository browser.