source: omimparser.pl

Last change on this file was 10, checked in by maarten, 9 years ago

reference

File size: 5.1 KB
Line 
1
2
3use lib "../Bioperl-patch";
4
5use Bio::Phenotype::OMIM::OMIMparser;
6use strict;
7use warnings;
8use lib::Omim2Pubmed;
9use lib::Omim2dbsnp;
10
11use Data::Dumper;
12use lib::ProccesAllicVariants;
13
14        use Carp;
15
16my $filename=$ARGV[0];
17
18if (! -e($filename)){
19  die("\nfile $filename not found");
20  }
21  # The OMIM database is available as textfile at:
22  # ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z
23  # The genemap is available as textfile at:
24  # ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap
25
26#   $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap  => "/path/to/genemap",
27#                                                         -omimtext => "/path/to/omim.txt" );
28my $pubmedcited="data/pubmed_cited";
29my $omim2pubmed=Omim2Pubmed->new();
30$omim2pubmed->loadpubmed_cited($pubmedcited);
31
32
33my $dbsnpfile="data/dbsnp-to-OMIM.cvs";
34my $dbsnp=Omim2dbsnp->new();
35$dbsnp->loadCrossReference($dbsnpfile);
36
37
38  my $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new(  -omimtext => $filename );
39  my $PAV=ProccesAllicVariants->new();
40$PAV-> addOmim2pubmed($omim2pubmed);
41$PAV->addOmim2dbsnp($dbsnp);
42open (FILE, ">output/filename_errors.txt") or die $!;
43
44  while ( my $omim_entry = $omim_parser->next_phenotype() ) {
45    # This prints everything.
46   # print( $omim_entry->to_string() );
47    #print "\n\n";
48
49    # This gets individual data (some of them object-arrays)
50    # (and illustrates the relevant methods of OMIMentry).
51    my $numb  = $omim_entry->MIM_number();                     # *FIELD* NO
52    my $title = $omim_entry->title();                          # *FIELD* TI - first line
53    my $alt   = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines
54#     my $mtt   = $omim_entry->more_than_two_genes();            # "#" before title
55#     my $sep   = $omim_entry->is_separate();                    # "*" before title
56#     my $desc  = $omim_entry->description();                    # *FIELD* TX
57#     my $mm    = $omim_entry->mapping_method();                 # from genemap
58#     my $gs    = $omim_entry->gene_status();                    # from genemap
59#     my $cr    = $omim_entry->created();                        # *FIELD* CD
60#     my $cont  = $omim_entry->contributors();                   # *FIELD* CN
61#     my $ed    = $omim_entry->edited();                         # *FIELD* ED
62#     my $sa    = $omim_entry->additional_references();          # *FIELD* SA
63#     my $cs    = $omim_entry->clinical_symptoms_raw();              # *FIELD* CS
64#     my $comm  = $omim_entry->comment();                        # from genemap
65
66
67       
68        #$title=~/(\S*.)$/;
69
70       
71#     my $mini_mim   = $omim_entry->miniMIM();                   # *FIELD* MN
72      # A Bio::Phenotype::OMIM::MiniMIMentry object.
73      # class Bio::Phenotype::OMIM::MiniMIMentry
74      # provides the following:
75      # - description()
76      # - created()
77      # - contributors()
78      # - edited()
79      #
80    # Prints the contents of the MINI MIM entry (most OMIM entries do
81    # not have MINI MIM entries, though).
82#     print $mini_mim->description()."\n";
83#     print $mini_mim->created()."\n";
84#     print $mini_mim->contributors()."\n";
85#     print $mini_mim->edited()."\n";
86
87#     my @corrs      = $omim_entry->each_Correlate();            # from genemap
88      # Array of Bio::Phenotype::Correlate objects.
89      # class Bio::Phenotype::Correlate
90      # provides the following:
91      # - name()
92      # - description() (not used)
93      # - species() (always mouse)
94      # - type() ("OMIM mouse correlate")
95      # - comment()
96
97     my @refs       = $omim_entry->each_Reference();            # *FIELD* RF
98      # Array of Bio::Annotation::Reference objects.
99   #   print Dumper (@refs);
100    my @avs        = $omim_entry->each_AllelicVariant();       # *FIELD* AV
101  #print Dumper(@avs);
102        if (scalar(@avs)>0){
103                     
104          my $geneid=ProccesAllicVariants::getfilename($title,$alt);
105         
106             # print FILE ($numb."\t".$geneid."||".$title."<_".$alt."\n");
107
108               
109                if( -e "output/".$geneid.".txt"){
110                        print FILE ("\nWARNING FILENAME ALREADY EXITS  with name: $geneid and $numb\n $title \n $alt");
111               
112                }
113                if($geneid eq ""){
114                        if(! -e "output/".$numb.".txt"){
115                                $geneid=$numb;
116                        }
117                }
118                             
119      open (CSV, ">output/".$geneid.".txt") or die $!;
120       
121      print CSV ($PAV->proccesAllicVariants(\@avs,\@refs,$numb));
122             close (CSV);
123
124
125      # Array of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects.
126      # class Bio::Phenotype::OMIM::OMIMentryAllelicVariant
127      # provides the following:
128      # - number (e.g ".0001" )
129      # - title (e.g "ALCOHOL INTOLERANCE" )
130      # - symbol (e.g "ALDH2*2" )
131      # - description (e.g "The ALDH2*2-encoded protein has a change ..." )
132      # - aa_ori  (used if information in the form "LYS123ARG" is found)
133      # - aa_mut (used if information in the form "LYS123ARG" is found)
134      # - position (used if information in the form "LYS123ARG" is found)
135      # - additional_mutations (used for e.g. "1-BP DEL, 911T")
136
137    #my @cps        = $omim_entry->each_CytoPosition();         # from genemap
138      # Array of Bio::Map::CytoPosition objects.
139
140    #my @gss        = $omim_entry->each_gene_symbol();          # from genemap
141      # Array of strings.
142
143    # do something ...
144  }
145                }
146close (FILE);
Note: See TracBrowser for help on using the repository browser.