source: trunk/medline-importer/README @ 2

Last change on this file since 2 was 2, checked in by rob.hooft@…, 6 years ago

added source code and subproject directory structure

File size: 3.8 KB
Line 
1Instructions to update the database (manually):
2
3Step 1: Update the data files. Regularly, new data comes out Tuesday through
4Saturday at 18:00. Some days there is more than one data file.
5
6* sudo -u med-import ./get-data
7
8Step 2: Parse and import.
9
10* sudo -u med-import ./run-medline-import &
11* Monitor log/nohup.log. Each xml.gz file should run in about 2-30 minutes,
12  it may take up to a minute before the first update ("500 done") is showing.
13  There are up to 30000 citations in a single file. Treating them goes
14  much faster towards the end of the files.
15* sudo rm data/*xml.gz   or    sudo mv data/*xml.gz olddata/
16
17Step 3:
18* Update the pmid2doi_continuous mapping database, see /opt/pmid2doi-updater
19
20-----
21See also: https://trac.nbic.nl/nbiceng/ticket/32
22Source instructions:
23 * The source code comes from ErasmusMC
24   (svn+ssh://mi-biosemantiek1.erasmusmc.nl/home/svn/biosemantic/Martijns Research/MedlineImport):
25      org/erasmusmc/medlineImport/GeneratePMID2XMLFile.java
26      org/erasmusmc/medlineImport/ImportMasterScript.java
27      org/erasmusmc/medlineImport/util/BioTextDBConnection.java
28      org/erasmusmc/medlineImport/util/ObjectExchange.java
29      org/erasmusmc/medlineImport/xmlParsers/GenericXMLParser.java
30      org/erasmusmc/medlineImport/xmlParsers/medline/AbstractText.java
31      org/erasmusmc/medlineImport/xmlParsers/medline/ArticleLanguage.java
32      org/erasmusmc/medlineImport/xmlParsers/medline/ArticlePublicationType.java
33      org/erasmusmc/medlineImport/xmlParsers/medline/Author.java
34      org/erasmusmc/medlineImport/xmlParsers/medline/Chemical.java
35      org/erasmusmc/medlineImport/xmlParsers/medline/CitationSubset.java
36      org/erasmusmc/medlineImport/xmlParsers/medline/CommentsCorrections.java
37      org/erasmusmc/medlineImport/xmlParsers/medline/DataBank.java
38      org/erasmusmc/medlineImport/xmlParsers/medline/GeneralNote.java
39      org/erasmusmc/medlineImport/xmlParsers/medline/GeneSymbol.java
40      org/erasmusmc/medlineImport/xmlParsers/medline/Grant.java
41      org/erasmusmc/medlineImport/xmlParsers/medline/Investigator.java
42      org/erasmusmc/medlineImport/xmlParsers/medline/Keyword.java
43      org/erasmusmc/medlineImport/xmlParsers/medline/MedlineCitation.java
44      org/erasmusmc/medlineImport/xmlParsers/medline/MedlineParser.java
45      org/erasmusmc/medlineImport/xmlParsers/medline/MeshHeading.java
46      org/erasmusmc/medlineImport/xmlParsers/medline/MeshHeadingQualifier.java
47      org/erasmusmc/medlineImport/xmlParsers/medline/OtherAbstract.java
48      org/erasmusmc/medlineImport/xmlParsers/medline/OtherID.java
49      org/erasmusmc/medlineImport/xmlParsers/medline/PersonalNameSubject.java
50      org/erasmusmc/medlineImport/xmlParsers/medline/SpaceFlightMission.java
51      org/erasmusmc/medlineImport/xmlParsers/medline/SupplMeshHeading.java
52      org/erasmusmc/medlineImport/xmlParsers/NodeHandler.java
53 * It needs a few utilities from this package:
54      org/erasmusmc/utilities/AbstractNormaliser.java
55      org/erasmusmc/utilities/BinaryFileUtilities.java
56      org/erasmusmc/utilities/DateUtilities.java
57      org/erasmusmc/utilities/DirectoryUtilities.java
58      org/erasmusmc/utilities/IntegerUtilities.java
59      org/erasmusmc/utilities/LinuxUtilities.java
60      org/erasmusmc/utilities/ObjectUtilities.java
61      org/erasmusmc/utilities/RandomUtilities.java
62      org/erasmusmc/utilities/ReadCSVFile.java
63      org/erasmusmc/utilities/ReadCSVFileWithHeader.java
64      org/erasmusmc/utilities/ReadEncryptedFile.java
65      org/erasmusmc/utilities/ReadTextFile.java
66      org/erasmusmc/utilities/Row.java
67      org/erasmusmc/utilities/Score.java
68 * And a "standard library" for databasing:
69      lib/mysql-connector-java-5.1.23.jar
70
71 * It can be simply compiled using "javac". The main program is ImportMasterScript.java
72      % cd src
73      % javac -Xlint:unchecked org/erasmusmc/medlineImport/ImportMasterScript.java
74
Note: See TracBrowser for help on using the repository browser.