Changeset 21


Ignore:
Timestamp:
Jul 11, 2012, 8:11:19 PM (4 years ago)
Author:
e.s.lips@…
Message:
 
Location:
trunk/src/jag
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/jag/association_analysis.py

    r20 r21  
    118118                clusterresults.createcleanresults()
    119119                self.files[clusterresults.column_number]["clusterresults"] = clusterresults
    120                 self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate sumlog.out'
     120                self.in_and_out.save_sumlog(assoc_file, clusterresults) #'generate .sumlog'
    121121           
    122122            else:
  • trunk/src/jag/clusterresults.py

    r20 r21  
    4040            self.cluster_results[cluster["c"]]["snp"].append(snp_id)
    4141           
    42             try:
    43                 self.cluster_results[cluster["c"]]["p-value"].append(float(p_value))
     42            if(p_value == "NA"):
     43                p_value = 1
     44               
     45            self.cluster_results[cluster["c"]]["p-value"].append(float(p_value))
    4446               
    45             except ValueError:
    46                 log.info("A problem occured: could not transform the following p-value to a number: " \
    47                       + str(p_value))
    48                 if (p_value == "P"):
    49                     log.info("The group file should not contain a header")
    50 
    51                 sys.exit(1)
     47            if (p_value == "P"):
     48                log.info("The set (--set) file should not contain a header")
     49                sys.exit("\nTerminated analysis: " + endtime + "\n")
     50 
    5251
    5352
     
    116115            score = self.clean_results[group]["sumneglog10"]
    117116        else:
    118             sys.exit("could not find neglog_10 for group named" + str(group))
     117            sys.exit("could not find -log10 for set with name " + str(group))
    119118        return score
    120119
     
    129128            n_snp = self.clean_results[group]["n_snps"]
    130129        else:
    131             sys.exit("could not find score for group named" + str(group))
     130            sys.exit("could not find score for set with name " + str(group))
    132131        return n_snp
    133132
     
    141140            n_genes = self.clean_results[group]["n_genes"]
    142141        else:
    143             sys.exit("could not find score for group named " + str(group))
     142            sys.exit("could not find score for set with name " + str(group))
    144143        return n_genes
    145144
     
    186185
    187186        text_clean = [c.split("\t") for c in[ l.strip() for l in text_as_list[5:]]]
    188         #print (str(text_clean) + "\n")
    189187       
    190188        for i in range(1, len(text_clean)):
    191                    
    192189            self.clean_results[text_clean[i][0]] = {"n_snps":text_clean[i][1],
    193190                                        "n_genes":text_clean[i][2],
  • trunk/src/jag/common.py

    r20 r21  
    6666        path = path[:-4]
    6767
    68     getfile_handle(path + ".fam" , "This is set is --bfile variable", False)
    69     getfile_handle(path + ".bed" , "This is set is --bfile variable", False)
    70     getfile_handle(path + ".bim" , "This is set is --bfile variable", False)
     68    getfile_handle(path + ".fam" , "This is --bfile variable", False)
     69    getfile_handle(path + ".bed" , "This is --bfile variable", False)
     70    getfile_handle(path + ".bim" , "This is --bfile variable", False)
    7171
    7272    return path
     
    9292   
    9393    except IOError, error:
    94         log.info("\n" +  "\nCannot find " + path) #+ message)
     94        log.info("\n" +  "\nCannot find " + path)
    9595        if verbose:
    9696            raise error
     
    125125     equal to R-projects var() implementation
    126126    """
    127     #function needs at least 1 number to work
    128127    if not isinstance(numbers, list):
    129128        raise AssertionError
     
    262261    """
    263262    """
    264     # hier flaggen (aan/uit van shufflen van covariaten )
    265263    covar_perm_clean, start_of_lines = extract_pheno_from_covar_file(plink.covar_file)
    266264    filename = make_shuffled_phenotype(seeds, covar_perm_clean, start_of_lines)
    267265    return (filename)
    268     # return (covar_perm_clean) # christiaans oplossing (30 mei 2012)
    269266   
    270267
     
    300297    """
    301298    read the mapping from snp id to gene and clusternumber
    302      a  mapping file has 3 columns:
    303     column 1:snp id (rs number)
    304     column 2: gene name
    305     column 3: geneset name
    306    
    307299    the output is a dict with primary key the snp name
    308300    each value in the primary dict consist out of a dict with a gene (key="g")
     
    318310    for line in file_handle:
    319311        splitted = line.split()
    320         #print len(splitted)
    321312        #assign variables to descriptive variable name
    322313        if (len(splitted) > 2):
     
    350341def map_geneset_to_snp(geneset):
    351342    """
    352     read the mapping from snp id to gene and clusternumber
    353      a  mapping file has 3 columns:
    354     column 1:snp id (rs number)
    355     column 2: gene name
    356     column 3: geneset name
    357    
     343    read the mapping from snp id to gene and geneset number
    358344    the output is a dict with primary key the snp name
    359345    each value in the primary dict consist out of a dict with a gene (key="g")
    360      and a cluster number (key="c")
     346    and a geneset number (key="c")
    361347    """
    362348    if len(geneset) == 0:
     
    364350    file_handle = getfile_handle(geneset)
    365351    snptogeneset = {}
    366     #read the cluster of genes file
    367352   
    368353    for line in file_handle:
     
    386371    numbers = array.array('d', numbers)
    387372    neg_sum_log10_value = sum([ math.log10(x) for x in numbers])
    388     #this should be -1 * neg_sum_log: abs prevents to write -0.0
    389373    return abs(neg_sum_log10_value)
    390374
  • trunk/src/jag/drawrandom.py

    r20 r21  
    11"""
    2 performs step 4 of the jag pipeline
     2Make random draws
    33
    44@author: maartenk, esther lips
     
    8181            log.info(_get_terminated_time())
    8282            sys.exit()
    83         #load amount of  genes in geneset by the mapping file
     83        #load number of  genes in geneset by the mapping file
    8484        self.exclude_genes = set([ x["g"] for x in geneset_to_snp_mapping[geneset]])
    8585
     
    9696       
    9797       
    98         log.info("\nSize of gene pool (--all_genes): " + str(len(all_genes)) + " unique genes")
     98        log.info("\nSize of gene pool (--pool): " + str(len(all_genes)) + " unique genes")
    9999        log.info("\nThe geneset " + geneset + " contains " + str(ngenes) + " genes" )
    100100        log.info("\nMinimum size of gene pool needed to draw " + str(amount) + " gene-sets (" + str(ngenes) + "*" +  str(amount) + "): " + str(ngenes*amount))
     
    112112
    113113            for gene_as_key in sampled_keys:
    114                 #draw new_set_number random index
    115114                output_text += '\n'.join([str(snp) + "\t" + str(gene_as_key) + \
    116115                        "\t" + setname for snp in gene2snp[gene_as_key]])
     
    118117
    119118        incl_or_excl = "excl" if self.drawrandom.exclude else "incl"      #create correct filename
    120         #filename = "draws_ngenes_" + incl_or_excl + "_" + str(geneset) + ".set"
    121         filename = "draws_ngenes.set"
     119        filename = "draws_ngenes.set.annot"
    122120        out = self.drawrandom.inoutput.save_text_to_filename(filename, output_text)     #save file
    123121        log.info("\nSaved random draws on number of genes as " + out)
     
    167165                outfile_text += rs_number + '\t' + str(snp2gene_mapping[rs_number]) + "\n"  #add mapping
    168166            except KeyError:
    169                 outfile_text += str(rs_number) + "\t" + " " + "\n"  #if there is no mapping found add a whitespace(" ")
     167                outfile_text += str(rs_number) + "\t" + " " + "\n" 
    170168       
    171169        outfile = self.drawrandom.inoutput.save_text_to_filename("prune.in", outfile_text)  #save mapped prune file to prunefile
     
    214212    def snp(self, geneset, amount, empp_file, plink, out):
    215213        """
    216         draw random snp, on number of neff snps in the empp.out file, from an independent snp file (.prune.in). If this file is not
     214        draw random snp, on number of neff snps in the .empp file, from an independent snp file (.prune.in). If this file is not
    217215        present, this file will be created within _create_indep_snp_file function.
    218216       
     
    323321            incl_or_excl = "excl"
    324322       
    325         #filename = "draws_neff_" + inregion_text + "_" + incl_or_excl + "_" + str(geneset) + ".set"
    326         filename = "draws_neff_" + inregion_text + ".set"
    327         #print(filename)
     323        filename = "draws_neff_" + inregion_text + ".set.annot"
    328324        out = self.drawrandom.inoutput.save_text_to_filename(filename, random_snp_text)     #save random snps file
    329         #print(out)
    330325        log.info("\nSaved random draws on number of effective number of SNPS as " + out)
    331326        return(filename)
  • trunk/src/jag/file_fetch_and_write.py

    r20 r21  
    4141                    self.out = full_path + "."
    4242            else:
    43                 print("Jag stopped")
     43                print("Jag has stopped.")
    4444                sys.exit(1)
    4545   
     
    5151        save sumlog file to disk.
    5252        """
    53 
     53       
    5454        output_number = get_number_of_assoc_file(assoc_file)
     55        output_number= str(output_number[1:])
     56       
    5557        filename = self.create_assoc_analysis_filename(output_number)
    5658        results_as_text = clusterresults.format_results()
     
    116118        create a full path for a assoc analysis file based on number
    117119        """
    118         return self.out + "sumlog" + str(number) + ".out"
     120       
     121        return self.out + str(number) + ".sumlog"
    119122
    120123
     
    122125        """
    123126        get association analysis filename from the current directory
    124         with a format like sumlog*.out
    125         """
    126 
    127         files = glob.glob(self.out + "sumlog*.out")
    128         test = re.compile("sumlog(\.[Pp]\d{0,3}){0,1}\.out$")
     127        with a format like *.sumlog
     128        """
     129
     130        files = glob.glob(self.out + "*.sumlog")
     131        test = re.compile("(\.[Pp]\d{0,3}){0,1}\.sumlog$")
    129132        files_clean = [x for x in files if test.search(x) != None]
    130133
  • trunk/src/jag/jag_main.py

    r20 r21  
    1717
    1818sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + "/src/")
    19 #next is only for development reasons (when run as python jag_main.py)
    20 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + "/../")
    2119
    2220import jag.common as common
     
    5250   
    5351    return("""
    54     ####################################################
    55     #                                                  #
    56     #     JAG  - Joint Association of Genes - V1.0     #   
    57     #    (C) 2011-2012 Esther Lips & Maarten Kooyman   #
    58     #         GNU General Public License, v2           #
    59     #                                                  #   
    60     #            Complex Trait Genetics Lab            #
    61     #           http://ctglab.nl/software/jag/         #
    62     #                                                  #
    63     ####################################################
     52    ##############################################################
     53    #                                                            #
     54    #     JAG  - Joint Association of Genetic variants - V1.0    #   
     55    #            2012,  Esther Lips & Maarten Kooyman            #
     56    #              GNU General Public License, v2                #
     57    #                                                            #   
     58    #                Complex Trait Genetics Lab                  #
     59    #               http://ctglab.nl/software/jag/               #
     60    #                                                            #
     61    ##############################################################
    6462    """)
    6563
     
    7169    """
    7270    return("""
    73     === MAPPING SNPs TO GENES ===
    74     --annotate         file with genes to map
     71    === SNP2GENE ANNOTATION ===
     72    --snp2gene         file with genes to map
    7573    --up               size upstream region in kb
    7674    --down             size downstream region in kb
    77     --gene_boundaries  file with gene boundaries
    78     --snp_file         file with snps
     75    --gene_loc        file with chromosomal locations of genes
     76    --snp_loc         file with chromosomal location of snps
    7977     
    80     === ASSOCIATION ANALYSIS ===
    81     performs association analysis + permutations
     78    === SELF-CONTAINED TEST ===
    8279    --bfile    -b      path of fam, bim and bed file without file extension
    83     --set    -g          file with genesets & snps
     80    --set      -g      file with snp annotated genesets
    8481    --perm     -m      number of permutations
    85     --no_emp           do not calculate empirical P-value
     82    --no_emp           disable calculation of empirical P-value
    8683    --seed     -s      seed for permutations
    8784   
    88     == OPTION FOR ASSOCIATION TEST ==
    89     --verbose  -v      print output of every plink file
    90     --pheno    -p      add a file with phenotypes
    91     --out      -o      set a prefix for output file
    92     --logistic         add logistic switch to plink
    93     --linear
    94     --sex
    95     --covar
     85    == OPTIONS FOR SELF-CONTAINED TEST ==
     86    --verbose  -v      print PLINK output
     87    --pheno    -p      alternate phenotype files
     88    --out      -o      prefix for output file
     89    --logistic         perform logistic association test (in PLINK)
     90    --linear           perform logistic association test (in PLINK)
     91    --covar            covariate file
    9692    --gene_based       sets are based on single genes
    9793    --adjust           adjust pvalues of real data with lambda
    98     --no_graph         do not create graphs within R
     94    --no_plots         do not create plots within R
    9995
    10096
     
    109105    --ndraw            number of sets of random genes/SNP sets to be drawn (= integer )
    110106    --set              geneset file
    111     --all_genes         list with all genes and SNPs
     107    --pool         list with all genes and SNPs
    112108   
    113109    ==switch==
    114     --ngenes           select functional set by set name
    115     --neff_all         {set name}        select all SNPs default
    116     --neff_genic       {set name}        only select SNPs located within genes
    117     --neff_intergenic  {set name}        only select SNPs located outside genes
     110    --draw_ngenes           select functional set by set name
     111    --draw_neff_all         {set name}        select all SNPs default
     112    --draw_neff_genic       {set name}        only select SNPs located within genes
     113    --draw_neff_intergenic  {set name}        only select SNPs located outside genes
    118114 
    119115
     
    123119   
    124120    mandatory:
    125     --exclude_group (default enable)
    126     --include_group   
     121    --include   
    127122
    128123    === CALCULATE EMPP OF EMPP ===
     
    132127
    133128   ===examples===:
    134    association test:         jag -bfile --bfile ../tutorial_data/example --set jag.set --perm 10 --out test
    135                              jag -bfile hapmap/hapmap_CEU_r23_60 -set hapmap/hapmap_group.txt --pheno hapmap/hapmap_pheno.txt -m 100 --out test
    136    self contained test       jag --merge test --out test
    137    random draws              jag --ndraw 20 --bfile ../tutorial_data/example --set jag.set --neff_genic hsa04080 --neff_calc jag.empp.P1.out --all_genes jag.all_genes --out test
    138                              jag --ndraw 50 --ngenes hsa04080 --set jag.set --all_genes jag.allgenes --out test
    139    competitive test          jag --orig_empp test2.empp.merged.P1.out --control_empp draw_neff_genic.empp.P1.out --gene_set hsa04080
     129   snp2gene annotation       jag --snp2gene --gene_loc hg18/hg18.gene.loc --snp_loc hg18/hg18.snp.loc
     130   self-contained test:      jag --bfile example_data/example --set jag.set.annot --perm 10
     131   merge data                jag --merge test --out test
     132   random draws              jag --ndraw 20 --bfile example_data/example --set jag.set.annot --draw_neff_genic hsa04080 --neff_calc jag.P1.empp --pool jag.allgenes.annot
     133                             jag --ndraw 50 --draw_ngenes hsa04080 --set jag.set.annot --pool jag.allgenes.annot
     134   competitive test          jag --orig_empp jag.merged.P1.empp --control_empp jag.draw_neff_genic.P1.empp --gene_set hsa04080
    140135    """)
    141136
     
    147142    """
    148143   
    149     def _step0_annotate_genes(self, out, annotate_file, up_size, down_size, gene_boundaries, snp_file):
    150         """
    151         --all_genes initial coding genes
    152         --up    size upstream region in kb
     144    def _step0_annotate_genes(self, out, annotate_file, up_size, down_size, gene_loc, snp_loc):
     145        """
     146        --snp2gene file with geneset(s)
     147        --up       size upstream region in kb
    153148        --down     size downstream region in kb
    154         --gene-boundaries    file with gene boundaries
    155         --snp_file    file with snps
     149        --gene_loc    file with gene boundaries
     150        --snp_loc    file with snps
    156151        """
    157152   
    158153        import jag.snp2geneset as snp2geneset
    159         all_gene2snp, geneset2snp = snp2geneset.snp2geneset(annotate_file, up_size, down_size, gene_boundaries, snp_file)
    160        
    161         mapping_as_string = snp2geneset.format_mapping(geneset2snp)
     154        all_gene2snp, geneset2snp = snp2geneset.snp2geneset(annotate_file, up_size, down_size, gene_loc, snp_loc)
     155       
     156        set_as_string = snp2geneset.format_mapping(geneset2snp)
    162157        all_genes_as_string = snp2geneset.format_mapping(all_gene2snp)
    163158       
    164         out_sets = out.save_text_to_filename("set", mapping_as_string)
    165         out_all = out.save_text_to_filename("allgenes", all_genes_as_string)
    166        
    167         #print(out_all)
     159        out_sets = out.save_text_to_filename("set.annot", set_as_string)
     160        out_all = out.save_text_to_filename("allgenes.annot", all_genes_as_string)
     161   
    168162        log.info("Saved snps2allgenes file as " + out_all)
    169163        log.info("Saved snps2geneset file as " + out_sets)
     
    214208
    215209
    216     def _step4a_draw_genes(self, draw , ngenes, ndraws):
     210    def _step4a_draw_genes(self, draw , draw_ngenes, ndraws):
    217211        """
    218212        draw random sets of genes, based on number of genes in geneset of interest
     
    223217        import jag.drawrandom as drawrandom
    224218        genes = drawrandom.Genes(draw)
    225         genes.genes(ngenes, ndraws)
     219        genes.genes(draw_ngenes, ndraws)
    226220        log.info(_get_endtime())
    227221        sys.exit(0)
     
    298292        else:
    299293            try:
    300                 opts, args = getopt.getopt(args, "p:chb:m:vg:o:s:", ["ndraw=", "ngenes=", "neff_calc=",
    301                     "out=", "snp2gene=", "neff_all=", "neff_intergenic=",
    302                     "neff_genic=", "exclude_group",
    303                     "all_genes=", "snp_file=", "gene_boundaries=", "down=", "up=",
    304                     "include_group", "no_emp", "gene_based", "no_graph", "help",
     294                opts, args = getopt.getopt(args, "p:chb:m:vg:o:s:", ["ndraw=", "draw_ngenes=", "neff_calc=",
     295                    "out=", "snp2gene=", "draw_neff_all=", "draw_neff_intergenic=",
     296                    "draw_neff_genic=", "exclude",
     297                    "pool=", "snp_loc=", "gene_loc=", "down=", "up=",
     298                    "include", "no_emp", "gene_based", "no_plots", "help",
    305299                    "merge=",
    306300                    "verbose", "runplink=",
    307                     "output=", "bfile=", "pheno=", "set=", "perm=", "cluster=", "seed=",
     301                    "bfile=", "pheno=", "set=", "perm=", "seed=",
    308302                    "orig_empp=", "control_empp=", "gene_set=",
    309                     "covar=", "sex", "logistic", "linear", "adjust"])
     303                    "covar=", "logistic", "linear", "adjust"])
    310304           
    311305            except getopt.GetoptError, err:
    312             # print help information and exit:
    313             # will print something like "option -assigned_value not recognized"
    314             # here is print used and no log.info because this is not initialized
    315306                print _get_header_of_program()
    316307                print _get_starttime()
    317308                print ""
    318309                print str(err)
    319                 print "Type ./jag --help for usage."
     310                print "Type jag --help for usage."
    320311                print _get_terminated_time()
    321312                sys.exit(2)
     
    348339        up = float(0)
    349340        down = float(0)
    350         gene_boundaries_file = None
    351         snp_file = None
     341        gene_loc_file = None
     342        snp_loc = None
    352343
    353344        no_emp = False
    354         create_graphs = True
    355         ngenes = False
     345        create_plots = True
     346        draw_ngenes = False
    356347        empirical_p_filename = False
    357348
     
    370361        sims = False
    371362        set_name = False
    372        
    373         #catch out prefix
     363
    374364        out_prefix = "jag"
    375365       
     
    380370               
    381371            elif identifier in ("-s", "--set"):
    382                 match_draw = re.match("\w+(\.draws_n\w*)\.set$", assigned_value)
     372                match_draw = re.match("\w+(\.draws_n\w*)\.set.annot$", assigned_value)
    383373                if match_draw is not None:
    384374                    out_prefix = out_prefix + match_draw.group(1)
     
    392382                out_prefix = out_prefix + ".gene_based"
    393383             
    394         #set input and output prefix first: this is also used for the log   
    395384        from jag.file_fetch_and_write import InAndOut
    396385        inoutput = InAndOut()
    397386        inoutput.set_outfile(out_prefix)
    398387       
    399         #no logging before these statements
    400         self.enable_logging_with_prefix(inoutput)
    401        
    402         #logging is now enabled
     388        self.enable_logging_with_prefix(inoutput)       
    403389        log.info(_get_header_of_program())
    404         log.info("Save log file as [" + inoutput.out + "log]\n")
     390        log.info("Save logfile as [" + inoutput.out + "log]\n")
    405391        log.info(_get_starttime())
    406392       
    407         # print command line options
    408393        log.info("\nUsed options:")
    409394        for o, a in opts:
     
    429414                                       
    430415                elif identifier in ("-s", "--set"):
    431                     #if(assigned_value == ".ndraws_n")
    432                     #    log.info("--set matches" + assigned_value)
    433                     assert common.getfile_handle(assigned_value, \
    434                     "(--set)", verbose)
     416                    assert common.getfile_handle(assigned_value,"(--set)", verbose)
    435417                    group = assigned_value
    436418                    plink.group = assigned_value
     
    443425                    verbose = True
    444426                   
    445                 elif identifier in ("--snp_file"):
    446                     snp_file = assigned_value
     427                elif identifier in ("--snp_loc"):
     428                    snp_loc = assigned_value
    447429                   
    448430                    assert common.getfile_handle(assigned_value, \
    449                                                  "(--snp_file)", verbose)
     431                                                 "(--snp_loc)", verbose)
    450432
    451433                elif identifier in ("--control_empp"):
     
    464446                elif identifier in ("--no_emp"):
    465447                    no_emp = True
    466                     create_graphs = False
    467                    
    468                 elif identifier in ("--no_graph"):
    469                     create_graphs = False
     448                    create_plots = False
     449                   
     450                elif identifier in ("--no_plots"):
     451                    create_plots = False
    470452                   
    471453                elif identifier in ("--ndraw"):
    472454                    ndraws = int(assigned_value)
    473455                   
    474                 elif identifier in ("--ngenes"):
    475                     ngenes = assigned_value
     456                elif identifier in ("--draw_ngenes"):
     457                    draw_ngenes = assigned_value
    476458                   
    477459                elif identifier in ("--snp2gene"):
     
    496478                        sys.exit(1)
    497479               
    498                 elif identifier in ("--gene_boundaries"):
    499                     assert common.getfile_handle(assigned_value, "Cannot find file with gene boundaries (set by --gene_boundaries)", verbose)
    500                     gene_boundaries_file = assigned_value
     480                elif identifier in ("--gene_loc"):
     481                    assert common.getfile_handle(assigned_value, "Cannot find file with gene boundaries (set by --gene_loc)", verbose)
     482                    gene_loc_file = assigned_value
    501483                 
    502                 elif identifier in ("--all_genes"):
     484                elif identifier in ("--pool"):
    503485                    complete_gene_snp_mapping = assigned_value
    504486
    505                 elif identifier in ("--neff_genic"):
     487                elif identifier in ("--draw_neff_genic"):
    506488                    draw_selection = "genic"
    507489                    set_name = assigned_value
    508490                   
    509                 elif identifier in ("--neff_intergenic"):
     491                elif identifier in ("--draw_neff_intergenic"):
    510492                    draw_selection = "intergenic"
    511493                    set_name = assigned_value
    512494                   
    513                 elif identifier in ("--neff_all"):
     495                elif identifier in ("--draw_neff_all"):
    514496                    draw_selection = "all"
    515497                    set_name = assigned_value
     
    519501                         "(--covar;", verbose)
    520502                    plink.covar_file = assigned_value
    521                    
    522                 elif identifier in ("--sex"):
    523                     plink.switches += "--sex "
    524                    
     503                               
    525504                elif identifier in ("--linear"):
    526505                    plink.switches += "--linear "
     
    533512                    adjusted = True
    534513
    535                 elif identifier in ("--exclude_group"):
     514                elif identifier in ("--exclude"):
    536515                    exclude_group = True
    537516                   
    538                 elif identifier in ("--include_group"):
     517                elif identifier in ("--include"):
    539518                    exclude_group = False
    540519
     
    570549            log.info("")       
    571550                 
    572             inoutput.run_rproject = create_graphs
     551            inoutput.run_rproject = create_plots
    573552                       
    574553            if merge:
     
    579558                sys.exit(0)
    580559
    581             if annotate_file and gene_boundaries_file and snp_file:
     560            if annotate_file and gene_loc_file and snp_loc:
    582561                self._step0_annotate_genes(inoutput, annotate_file, up, down, \
    583                                             gene_boundaries_file, snp_file)
    584 
    585             elif ngenes or draw_selection:
    586                 common.getfile_handle(complete_gene_snp_mapping, "--all_genes is not set correctly.", verbose)
     562                                            gene_loc_file, snp_loc)
     563
     564            elif draw_ngenes or draw_selection:
     565                common.getfile_handle(complete_gene_snp_mapping, "--pool is not set correctly.", verbose)
    587566                common.getfile_handle(group, "--set is not set.", verbose)
    588567
     
    595574                    draw.setseed(seed)
    596575
    597                 if ngenes:
    598                     self._step4a_draw_genes(draw, ngenes, ndraws)
     576                if draw_ngenes:
     577                    self._step4a_draw_genes(draw, draw_ngenes, ndraws)
    599578               
    600579                if draw_selection:
  • trunk/src/jag/mergepermutations.py

    r20 r21  
    2929        get all permuted files and return it in a dict with as key phenotype and a list of filenames
    3030        """
    31         files = glob.glob(self.inout.out + "perm.*.out")   #get perm * files
    32                  
     31       
     32        files = glob.glob(self.inout.out + "*.perm")   #get perm *.perm
     33        pattern = re.compile(self.inout.out + ".{1,8}\.P(?P<pheno>.*)\.perm")
     34        files_clean = [x for x in files if pattern.search(x) != None]
     35           
    3336        if (len(files) < 1):
    34             log.info("Could not find any permutations named as " + self.inout.out + "perm.*.out")
     37            log.info("Could not find any permutations named as " + self.inout.out + "*.perm")
    3538            log.info(common.get_terminated_time())
    3639            sys.exit()
    3740           
    38         ordered_results = _get_ordered_files_from_list(files)
    39        
     41        ordered_results = _get_ordered_files_from_list(files_clean)
    4042        return (ordered_results)
    4143
     
    6062        """
    6163       
    62         sumlogfiles = glob.glob(self.inout.out + "sumlog.*.out")
    63         test = re.compile("sumlog\.P(?P<pheno>.*)\.out$")
     64        sumlogfiles = glob.glob(self.inout.out + "*.sumlog")
     65        test = re.compile("\.P(?P<pheno>.*)\.sumlog$")
    6466        files_clean = [x for x in sumlogfiles if test.search(x) != None]
    6567        aa_result = {}
     
    8385             
    8486        pheno_nr = 0
     87       
    8588        #merge each phenotype
    8689        for key in keys:
     
    8891            log.info("\nMerging " + (str(len(perm_files[key]))) + " permutation files for phenotype " + key + "...")
    8992            perm_out_string = ordered_results[key].format_permout() # concatenated results
    90             perm_filename = "perm.merged.P" + key + ".out"
     93            perm_filename = "merged.P" + key + ".perm"
    9194            perm_out_filename = self.inout.save_text_to_filename(perm_filename, perm_out_string)
    9295            log.info("Saved merged permutations as " + perm_out_filename)
     
    98101
    99102                empp_out_as_text = ordered_results[key].format_permutated_results(aa_object)
    100                 emp_filename = "empp.merged.P" + key + ".out"
     103                emp_filename = "merged.P" + key + ".empp"
    101104                empp_filename = self.inout.save_text_to_filename(emp_filename, empp_out_as_text)
    102105                log.info("Saved empirical pvalues as " + empp_filename)
     
    110113       
    111114            else:
    112                 log.info("\nWarning: Could not find sumlog file " + self.inout.out + "sumlog.P" + key + ".out")
     115                log.info("\nWarning: Could not find sumlog file " + self.inout.out + ".P" + key + ".sumlog")
    113116                log.info(common.get_terminated_time())
    114117                sys.exit()
     
    121124    """
    122125    # Skip 'merged' files, but accept manually entered seeds of any length
    123     test_merged = re.compile("perm\.merged\.P(?P<pheno>.*)\.out$")
     126    test_merged = re.compile("\.merged\.P(?P<pheno>.*)\.perm$")
    124127    files_clean = [x for x in files if test_merged.search(x) is  None]
    125128   
    126     #filter other none correct files
    127     test = re.compile("perm\..{1,8}\.P(?P<pheno>.*)\.out$")
     129    test = re.compile("\..{1,8}\.P(?P<pheno>.*)\.perm$")
    128130    files_clean = [x for x in files_clean if test.search(x) != None]
    129131    phenotypes = set([test.search(x).group("pheno") for x in files_clean])
     
    132134   
    133135    for phenotype in phenotypes:
    134         #print phenotype
    135         test_pheno = re.compile("perm\..{1,8}\.P" + str(phenotype) + "\.out$")
     136        test_pheno = re.compile("\..{1,8}\.P" + str(phenotype) + "\.perm$")
    136137        pheno_type_files = [x for x in files_clean if test_pheno.search(x) != None]
    137138        ordered_results[phenotype] = pheno_type_files
  • trunk/src/jag/permutation.py

    r20 r21  
    4141            self.plink.phenotypes_present = _get_aa_phenotypes_present(aa_results)
    4242        else:
    43             #create a list with number of phenotypes
    4443            aa_results = self.plink.get_number_of_pheno_types() * [False]
    4544            self.plink.phenotypes_present = range(1, len(aa_results) + 1)
     
    6463        nperm = int(nperm)
    6564       
    66         #set number of permutations per cpu
    6765        permperjob = int(50)
    68         # When a covar file is set , multiple phenotypes can not be run at once
    69         #because the covar is coupled by phenotype.this is a hack to prevent
    70         #multiple  phenotypes are ran at once
     66
    7167        if self.plink.covar_file:
    7268            permperjob = 1
     
    7975        for key, perm_score_per_group in results_merged.iteritems():
    8076            perm_out_string = perm_score_per_group.format_permout()
    81             #print(perm_out_string)
    82             perm_out_filename = "perm." + str(random_seed) + ".P" + str(key) + ".out"
     77            perm_out_filename = str(random_seed) + ".P" + str(key) + ".perm"
    8378            #save permutation score as table with random seed in filename
    8479            perm_filename = self.in_out.save_text_to_filename(perm_out_filename, perm_out_string)
    8580            log.info("\nSaved permutation results as " + perm_filename)
    86             #make sure key is a string for concatenating
    8781            key = str(key)
    8882
     
    9892                empp_out_text = perm_score_per_group.format_permutated_results(current_aa_results)
    9993
    100                 emp_filename = "empp.P" + key + ".out"
     94                emp_filename = "P" + key + ".empp"
    10195                emp_filename = self.in_out.save_text_to_filename(emp_filename, empp_out_text)
    10296                log.info("Saved empirical pvalues as " + emp_filename)
    103                 #save created files
    10497                self.files[str(key)] ["empp"] = emp_filename
    10598               
    106                 #pheno_nr = "P" + key
    10799                if self.in_out.run_rproject:
    108100                    import jag.plot_with_r as plot_with_r
     
    153145               
    154146                scores = fg_cluster.permutated_scores[groupname]
    155                 #print("Scores:", str(scores))
    156147                perm_per_pheno[pheno_nr].add_results(groupname, scores)
    157148   
  • trunk/src/jag/permutationresults.py

    r20 r21  
    9999        groupnames = self.permutated_scores.keys()
    100100        groupnames.sort(key=common.alpha_sort)
    101         #print(groupnames)
    102101        length_permutations = [len(self.permutated_scores[g]) for g in groupnames]
    103102       
     
    113112            log.info(common.get_terminated_time())
    114113            exit()
    115         #check length of each key is equal
    116114        return(formated_results)
    117115
     
    127125        message += "\tvar(Perms)\tmean(Perms)\tnEff\n"
    128126        for groupname in keys:
    129         #for groupname in self.permutated_scores.iterkeys():
    130127            sum_neg_log = aa_result.get_sum_neglog_10_score(groupname)
    131128            n_snp = aa_result.get_n_snps(groupname)
     
    194191    text_as_list = file_handle.readline()
    195192    header = text_as_list.strip().split("\t")
    196     #create empty list of list to store columns
    197193    results = [[] for i in range(len(header))]
    198194    for line in file_handle:
    199195        splitted_line = line.strip().split("\t")
    200         #add group name from first column
    201196        results[0].append(splitted_line.pop(0))
    202197        for i , value in enumerate(splitted_line, 1):
  • trunk/src/jag/plink.py

    r20 r21  
    8282                        permutation_completed = str(self.permutations_completed + perm_this_run)
    8383                        log.info("permutation " + permutation_completed + " ready")
    84                 #else:
    85                 #    log.info("Analysing" + str(nr_of_phenotypes))
    86                     #log.info("Ready with phenotype" + str(column_number))
    8784               
    8885            output_of_plink += str(console_out) + "\n"
     
    9693        """
    9794        outfile = tempfile.mkstemp()
    98         # select right function of plink
    99         #command = _find_os_right_plink()
    100         #print(command)
    101        
    102         # for the use of a non-basic association test / remove assoc option when logistic or lineaer are set
    103         # logistic test
     95
     96        # select logistic test
    10497        if (self.switches.count("--logistic") >= 1):
    10598            self.arguments = self.arguments.replace("--assoc", "")
     
    108101                self.arguments = self.arguments.replace("--adjust", "")
    109102       
    110         #linear test
     103        # select linear test
    111104        if (self.switches.count("--linear") >= 1):
    112105            self.arguments = self.arguments.replace("--assoc", "")
     
    117110
    118111        command = "plink --noweb " + self.arguments + self.switches + " --out " + outfile[1]
    119              
    120         #prefix = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
    121         #print("SELF ARGUMENTS" + self.arguments)
    122         #print("SELF SWITCHES" + self.switches) 
     112     
    123113        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    124114       
     
    145135       
    146136        if (self.verbose):
    147             #print("SAY HELLO!!")
    148137            log.info(output_of_plink)
    149138        #check if plink is finished correctly by checking the output of plink "Analysis finished"
     
    161150        """
    162151        resultfiles = {}
    163         amount_of_phenotypes = len(self.phenotypes_present)
    164         #print("amount_of_phenotypes"+str(amount_of_phenotypes))
     152        nr_of_phenotypes = len(self.phenotypes_present)
     153
    165154        for phenotype in self.phenotypes_present:
    166155            phenotype=int(phenotype)
    167156            resultfiles[phenotype] = []
    168             range_until=(len(seeds)*amount_of_phenotypes)
     157            range_until=(len(seeds)*nr_of_phenotypes)
    169158            #the second number in range excludes the range : it does not includes the border
    170             if (phenotype==amount_of_phenotypes):
     159            if (phenotype==nr_of_phenotypes):
    171160                range_until+=1
    172161         
    173             numbers=range(phenotype,range_until,amount_of_phenotypes)
     162            numbers=range(phenotype,range_until,nr_of_phenotypes)
    174163            resultfiles[phenotype]= [resultfile+".P"+str(number)+"."+results_extension for number in numbers]
    175164           
     
    211200       
    212201        for seeds in jobmap:
    213             #print(seeds)
    214202            permutations.append(self.run_permutation(seeds))
    215203            self.permutations_completed += len(seeds)
     
    229217        for resultfile in files:
    230218            filehandle = common.getfile_handle(resultfile)
    231            
    232            
     219                       
    233220            for line in filehandle:
    234221                splitted = line.split()
    235222                snp_id = splitted[1]
    236223                p_value = splitted[8]
    237                 
     224               
    238225                if(p_value == "NA"):
    239                     endtime = time.asctime(time.localtime(time.time()))
    240                     print("\n* P-values with a 'NA' value are detected.\nSee FAQ of manual for more information." )
    241                     # or a SNP that is genotyped only in cases or controls (and should be deleted from your data)
    242                     sys.exit("\nTerminated analysis: " + endtime + "\n")
    243                
     226                    p_value = 1
     227
    244228                if (snp_id in snp_to_group_map):
    245229                    clusterresults.add_snp_to_cluster(p_value, snp_to_group_map[snp_id])
     
    252236    return permuted_results
    253237
    254 
    255 def _find_os_right_plink():
    256     """
    257     Give the right path to plink depending on OS
    258     """
    259     command = ""
    260     if(sys.platform == "linux2"):
    261         bits = platform.architecture()[0]
    262         if (bits == "64bit"):
    263             command = "/../bin/plink-1.07-x86_64/plink"
    264         elif(bits == "32bit"):
    265             sys.exit("32 bit version of linux not supported")
    266         else:
    267             sys.exit("unknown version of linux")
    268     elif(sys.platform == "darwin"):
    269         command = "/../bin/plink-1.07-mac-intel/plink"
    270     elif(sys.platform == "win32"):
    271         sys.exit("Windows not supported")
    272 
    273     return command
    274238
    275239
  • trunk/src/jag/plot_with_r.py

    r20 r21  
    1717        '''Initialize Object to call R, with argument inout to
    1818        direct produced files to correct output folder.'''
    19         #IO handle
    20         self.inout = inout
     19        self.inout = inout        #IO handle
    2120
    2221        #Find R binary using 'which'
  • trunk/src/jag/snp2geneset.py

    r20 r21  
    2727            snp_data = line.strip().split("\t")
    2828            current_chr = snp_data[1]
    29            
    30             #check chromosome is present in list of needed chromosomes
     29 
    3130            if not current_chr in all_chromosomes:               
    3231                if not old_chr == current_chr:
     
    3635               
    3736            else:
    38                 #print message when new chromosome is found
    3937                if not old_chr == current_chr:
    4038                    log.info("Mapping SNPs to genes on chromosome " + str(current_chr))
Note: See TracChangeset for help on using the changeset viewer.