1 | package nl.tno.massSequencing.classification |
---|
2 | |
---|
3 | class Taxon { |
---|
4 | def sessionFactory |
---|
5 | |
---|
6 | /** |
---|
7 | * Available taxonomy levels. These levels are only here for reference, so you don't have |
---|
8 | * to use magic numbers when referencing a specific level |
---|
9 | * @author robert |
---|
10 | */ |
---|
11 | enum Level { |
---|
12 | DOMAIN(0, "Domain"), KINGDOM(1, "Kingdom"), PHYLUM(2, "Phylum"), CLASS(3, "Class"), ORDER(4, "Order"), FAMILY(5, "Family"), GENUS(6, "Genus"), SPECIES(7, "Species") |
---|
13 | |
---|
14 | private int levelNumber; |
---|
15 | private String description; |
---|
16 | |
---|
17 | private Level( int number, String description ) { |
---|
18 | levelNumber = number |
---|
19 | this.description = description; |
---|
20 | } |
---|
21 | |
---|
22 | public int number() { return levelNumber; } |
---|
23 | public String description() { return description; } |
---|
24 | } |
---|
25 | |
---|
26 | // Level of this taxon in the taxonomy. We use an int instead of a Level enum because the level |
---|
27 | // can be up to 12 in the future. This implementation is capable of handling all numbers |
---|
28 | int level = -1; |
---|
29 | |
---|
30 | // Name of the taxon |
---|
31 | String name; |
---|
32 | |
---|
33 | // Indices to handle hierarchy |
---|
34 | // @see http://dev.mysql.com/tech-resources/articles/hierarchical-data.html |
---|
35 | int lft; |
---|
36 | int rgt; |
---|
37 | |
---|
38 | static constraints = { |
---|
39 | } |
---|
40 | |
---|
41 | static mapping = { |
---|
42 | table 'taxonomy' |
---|
43 | lft index: 'lft_idx' |
---|
44 | rgt index: 'rgt_idx' |
---|
45 | level index: 'level_name_idx' |
---|
46 | name index: 'level_name_idx' |
---|
47 | } |
---|
48 | |
---|
49 | /** |
---|
50 | * Retrieves the parent of this taxon |
---|
51 | * @return The parent taxon for the current taxon, or null if no parent exists |
---|
52 | */ |
---|
53 | public Taxon giveParent() { |
---|
54 | // Root taxon doesn't have a parent |
---|
55 | if( level == 0 ) |
---|
56 | return null |
---|
57 | |
---|
58 | return Taxon.find( "FROM Taxon t WHERE t.lft < :currentLeft AND t.rgt > :currentRgt AND t.level = :parentLevel", [ 'currentLeft': lft, 'currentRgt': rgt, 'parentLevel': level - 1 ] ) |
---|
59 | } |
---|
60 | |
---|
61 | /** |
---|
62 | * Returns the path of taxa in the tree, leading up to this one. The list |
---|
63 | * includes the current taxon. |
---|
64 | * |
---|
65 | * @return |
---|
66 | */ |
---|
67 | public List givePath() { |
---|
68 | if( level == 0 ) |
---|
69 | return [ this ]; |
---|
70 | |
---|
71 | return Taxon.findAll( "FROM Taxon parent WHERE parent.lft <= :nodeLft AND parent.rgt >= :nodeLft ORDER BY parent.lft", [ 'nodeLft' : this.lft ] ) |
---|
72 | } |
---|
73 | |
---|
74 | /** |
---|
75 | * Returns the names of taxa in the tree, leading up to this one. The list |
---|
76 | * includes the current taxon. |
---|
77 | * |
---|
78 | * @return |
---|
79 | */ |
---|
80 | public List givePathNames() { |
---|
81 | if( level == 0 ) |
---|
82 | return [ this.name ]; |
---|
83 | |
---|
84 | return Taxon.executeQuery( "SELECT parent.name FROM Taxon parent WHERE parent.lft <= :nodeLft AND parent.rgt >= :nodeLft ORDER BY parent.lft", [ 'nodeLft' : this.lft ] ) |
---|
85 | } |
---|
86 | |
---|
87 | /** |
---|
88 | * Append the current taxon to the tree as a child of the parent taxon. The current taxon will be saved immediately. |
---|
89 | * |
---|
90 | * The lft and rgt properties of this taxon will be set. Also, if no level is set, the level will be set to the parent level + 1 |
---|
91 | * |
---|
92 | * Make sure to refresh all taxon objects after adding |
---|
93 | * |
---|
94 | * @param parentTaxon |
---|
95 | * @return true if the taxon has been added correctly, false otherwise |
---|
96 | */ |
---|
97 | public boolean appendTo( Taxon parentTaxon ) { |
---|
98 | // Without parentTaxon, a root taxon must be added |
---|
99 | if( !parentTaxon ) |
---|
100 | return addAsRoot(); |
---|
101 | |
---|
102 | // Perform the changes within a transaction, in order to rollback all changes if something fails |
---|
103 | Taxon.withTransaction { status -> |
---|
104 | def parentRgt = parentTaxon.rgt; |
---|
105 | |
---|
106 | try { |
---|
107 | // Update the tree by shifting all numbers 2 to the right |
---|
108 | Taxon.executeUpdate( "UPDATE Taxon t SET t.rgt = t.rgt + 2 WHERE t.rgt >= ?", [ parentRgt ] ) |
---|
109 | Taxon.executeUpdate( "UPDATE Taxon t SET t.lft = t.lft + 2 WHERE t.lft >= ?", [ parentRgt ] ) |
---|
110 | |
---|
111 | // Update the current taxon by putting the lft and rgt number correct |
---|
112 | this.lft = parentRgt |
---|
113 | this.rgt = parentRgt + 1 |
---|
114 | |
---|
115 | // Set the current taxon level, if needed |
---|
116 | this.level = this.level > -1 ? this.level : parentTaxon.level + 1; |
---|
117 | |
---|
118 | // Save this taxon in the database in order to prevent |
---|
119 | this.save( failOnError: true ); |
---|
120 | |
---|
121 | // The session must be cleared each time the update statements are executed |
---|
122 | // since otherwise the objects don't match anymore |
---|
123 | sessionFactory.getCurrentSession().flush(); |
---|
124 | sessionFactory.getCurrentSession().clear(); |
---|
125 | } catch( Exception e ) { |
---|
126 | // If an error occurs, rollback whole transaction |
---|
127 | status.setRollbackOnly(); |
---|
128 | |
---|
129 | throw new Exception( "Error occurred while adding taxon " + this, e ) |
---|
130 | } |
---|
131 | } |
---|
132 | |
---|
133 | return true; |
---|
134 | } |
---|
135 | |
---|
136 | /** |
---|
137 | * Append the current taxon to the tree as root taxon. The taxon will be saved. |
---|
138 | * |
---|
139 | * The lft and rgt properties of this taxon will be set. Also, if no level is set, the level will be set to 0 |
---|
140 | * |
---|
141 | * @return true if the taxon has been added correctly, false otherwise |
---|
142 | */ |
---|
143 | public boolean addAsRoot() { |
---|
144 | // Determine the maximum 'right' value |
---|
145 | def results = Taxon.executeQuery( "SELECT max(t.rgt) FROM Taxon t" ) |
---|
146 | def previousRgt = results && results[ 0 ] ? results[ 0 ] : 0 |
---|
147 | |
---|
148 | try { |
---|
149 | // Update the current taxon by putting the lft and rgt number correct |
---|
150 | this.lft = previousRgt + 1 |
---|
151 | this.rgt = previousRgt + 2 |
---|
152 | |
---|
153 | // Set the current taxon level, if needed |
---|
154 | this.level = this.level > -1 ? this.level : 0; |
---|
155 | |
---|
156 | // Save this taxon in the database in order to prevent |
---|
157 | this.save( failOnError: true ); |
---|
158 | } catch( Exception e ) { |
---|
159 | throw new Exception( "Error occurred while adding taxon " + this, e ) |
---|
160 | } |
---|
161 | |
---|
162 | } |
---|
163 | |
---|
164 | /** |
---|
165 | * Retrieves a taxon from the database based on the path of names given. |
---|
166 | * @param path An array or list with the names of the taxa in the path, ordered by level |
---|
167 | * @param startLevel Which level is the first entry in the list. Defaults to zero. Can be used in order to find taxa |
---|
168 | * without the whole tree being specified (e.g. without the root element) |
---|
169 | * @return First taxon that is found and matches the criteria or null if nothing is found |
---|
170 | */ |
---|
171 | public static Taxon findTaxonByPath( def path, int startLevel = 0, taxonCache ) { |
---|
172 | if( path.size() == 0 ) |
---|
173 | return null; |
---|
174 | |
---|
175 | // Check taxon cache |
---|
176 | def cacheTaxon = findTaxonInCache( taxonCache, path ); |
---|
177 | if( cacheTaxon ) |
---|
178 | return cacheTaxon; |
---|
179 | |
---|
180 | def leafLevel = path.size() - 1 + startLevel; |
---|
181 | def leafName = path[ -1 ]; |
---|
182 | |
---|
183 | // Find all taxa that match the given leafnode |
---|
184 | def leafs = Taxon.findAll( |
---|
185 | "FROM Taxon t WHERE t.level = :level AND t.name = :name", |
---|
186 | [ 'name': leafName, 'level': leafLevel ] |
---|
187 | ) |
---|
188 | |
---|
189 | // If none is found, return null |
---|
190 | if( !leafs || !leafs[0] ) |
---|
191 | return null |
---|
192 | |
---|
193 | // If one or more leafs are found, return the one with the correct path |
---|
194 | def leafPath |
---|
195 | def numLeafs; |
---|
196 | def i |
---|
197 | |
---|
198 | findLeafs: // Label in order to continue this loop |
---|
199 | for( leaf in leafs ) { |
---|
200 | leafPath = leaf.givePathNames(); |
---|
201 | |
---|
202 | numLeafs = leafPath.size(); |
---|
203 | |
---|
204 | for( i = 0; i < numLeafs; i++ ) { |
---|
205 | if( leafPath[ i ] != path[ i ] ) { |
---|
206 | continue findLeafs |
---|
207 | } |
---|
208 | } |
---|
209 | |
---|
210 | storeTaxonInCache( taxonCache, path, leaf ); |
---|
211 | |
---|
212 | return leaf; |
---|
213 | } |
---|
214 | |
---|
215 | return null; |
---|
216 | } |
---|
217 | |
---|
218 | public static emptyTaxonCache() { |
---|
219 | return [ "top": [:], "sub": [:] ]; |
---|
220 | } |
---|
221 | |
---|
222 | protected static Taxon findTaxonInCache( cache, path ) { |
---|
223 | if( path.size() <= 3 ) { |
---|
224 | return cache[ "top" ][ cacheKey( path ) ]; |
---|
225 | } else { |
---|
226 | def topCacheKey = cacheKey( path[0..2] ); |
---|
227 | |
---|
228 | if( !cache[ "sub" ][ topCacheKey ] ) |
---|
229 | return null; |
---|
230 | |
---|
231 | return findTaxonInCache( cache[ "sub" ][ topCacheKey ], path[ 3..-1] ); |
---|
232 | } |
---|
233 | } |
---|
234 | |
---|
235 | protected static void storeTaxonInCache( cache, path, taxon ) { |
---|
236 | // Keep a 'layered' cache: the first three levels are kept in this cache. The |
---|
237 | // next levels are kept in a separate cache, one map for each subtree, so |
---|
238 | // each map will have a size that we can still deal with |
---|
239 | // [ |
---|
240 | // "top": [ "bacteria": x, "bacteria;firmicutes": y ], |
---|
241 | // "sub": [ "bacteria;firmicutes;abc": |
---|
242 | // [ "def;ghi" : z, "def;gkl": m ] |
---|
243 | // ] |
---|
244 | if( path.size() <= 3 ) { |
---|
245 | cache[ "top" ][ cacheKey( path ) ] = taxon; |
---|
246 | } else { |
---|
247 | def topCacheKey = cacheKey( path[0..2] ); |
---|
248 | def restPath = path[3..-1] |
---|
249 | |
---|
250 | if( cache[ "sub" ][ topCacheKey ] == null ) { |
---|
251 | cache[ "sub" ][ topCacheKey ] = emptyTaxonCache(); |
---|
252 | } |
---|
253 | |
---|
254 | storeTaxonInCache( cache[ "sub" ][ topCacheKey ], restPath, taxon ) |
---|
255 | } |
---|
256 | |
---|
257 | } |
---|
258 | |
---|
259 | protected static cacheKey( path ) { |
---|
260 | return path.join( ";" ); |
---|
261 | } |
---|
262 | |
---|
263 | /** |
---|
264 | * Retrieves a taxon from the database based on the path of names given. If no taxon matches the criteria, |
---|
265 | * a new taxon is created (and the other parts of the tree are created as well. |
---|
266 | * |
---|
267 | * @param path An array or list with the names of the taxa in the path, ordered by level |
---|
268 | * @param startLevel Which level is the first entry in the list. Defaults to zero. Can be used in order to find taxa |
---|
269 | * without the whole tree being specified (e.g. without the root element) |
---|
270 | * @param taxonCache Hashmap with cached data about taxa found |
---|
271 | * @return First taxon that is found and matches the criteria or a new taxon if it didn't exist |
---|
272 | */ |
---|
273 | static Taxon findOrCreateTaxonByPath( def path, int startLevel = 0, def taxonCache = null ) { |
---|
274 | def taxon = findTaxonByPath( path, startLevel, taxonCache ); |
---|
275 | |
---|
276 | if( taxon ) |
---|
277 | return taxon; |
---|
278 | |
---|
279 | def depth = path.size(); |
---|
280 | def found = false |
---|
281 | def levelFound = -1; |
---|
282 | def parentNode; |
---|
283 | |
---|
284 | // The taxon doesn't exist yet. Find the highest level in the path that does exist |
---|
285 | // level contains the index of the taxon in the path. So the highest level is depth - 1. |
---|
286 | // However, we don't have to check the highest level, so we start at depth - 2 |
---|
287 | for( def level = depth - 2; level >= 0 && !found ; level-- ) { |
---|
288 | parentNode = findTaxonByPath( path[ 0 .. level ], startLevel, taxonCache ); |
---|
289 | |
---|
290 | // If this taxon is found, it is the highest level |
---|
291 | if( parentNode ) { |
---|
292 | found = true; |
---|
293 | levelFound = level; |
---|
294 | } |
---|
295 | } |
---|
296 | |
---|
297 | // Create taxons from the levelFound up to the leaf |
---|
298 | for( def level = levelFound + 1; level < depth; level++ ) { |
---|
299 | // Create a new taxon for this level |
---|
300 | parentNode = Taxon.createTaxon( path[ level ], level + startLevel, parentNode ); |
---|
301 | } |
---|
302 | |
---|
303 | // Return the leaf node |
---|
304 | return parentNode |
---|
305 | } |
---|
306 | |
---|
307 | /** |
---|
308 | * Searches the database for a taxon with the given name, leven and parent. If it exists, it is returned. |
---|
309 | * Otherwise it will be created and the newly created object is returned. |
---|
310 | * |
---|
311 | * @param name Name of the taxon to find or create |
---|
312 | * @param level Level of the taxon |
---|
313 | * @param parent Parent of the taxon |
---|
314 | * @return The taxon object |
---|
315 | */ |
---|
316 | public static Taxon findOrCreateTaxon( String name, int level, Taxon parent ) { |
---|
317 | def existingTaxon = findTaxon( name, level, parent ); |
---|
318 | |
---|
319 | if( existingTaxon ) |
---|
320 | return existingTaxon |
---|
321 | |
---|
322 | return createTaxon( name, level, parent ); |
---|
323 | } |
---|
324 | |
---|
325 | /** |
---|
326 | * Searches the database for a taxon with the given name, leven and parent. If it doesn't exist, |
---|
327 | * null is returned |
---|
328 | * |
---|
329 | * @param name Name of the taxon to find or create |
---|
330 | * @param level Level of the taxon |
---|
331 | * @param parent Parent of the taxon. If not given or null, the first taxon with the given name and level is returned. |
---|
332 | * @return The taxon object or null if it doesn't exist |
---|
333 | */ |
---|
334 | public static Taxon findTaxon( String name, int level, Taxon parent = null ) { |
---|
335 | if( parent ) { |
---|
336 | return Taxon.find( |
---|
337 | "FROM Taxon t WHERE t.level = :level AND t.name = :name AND t.lft > :parentLeft AND t.rgt < :parentRgt", |
---|
338 | [ 'name': name, 'level': level, 'parentLeft': parent.lft, 'parentRgt': parent.rgt ] |
---|
339 | ) |
---|
340 | } else { |
---|
341 | return Taxon.find( |
---|
342 | "FROM Taxon t WHERE t.level = :level AND t.name = :name", |
---|
343 | [ 'name': name, 'level': level ] |
---|
344 | ) |
---|
345 | } |
---|
346 | } |
---|
347 | |
---|
348 | /** |
---|
349 | * Creates a taxon with the given name, leven and parent. The method creates a new taxon |
---|
350 | * without checking whether it already exists. |
---|
351 | * |
---|
352 | * @param name Name of the taxon to create |
---|
353 | * @param level Level of the taxon |
---|
354 | * @param parent Parent of the taxon |
---|
355 | * @return The newly created taxon object |
---|
356 | */ |
---|
357 | public static Taxon createTaxon( String name, int level, Taxon parent = null ) { |
---|
358 | def t = new Taxon( name: name, level: level ); |
---|
359 | t.appendTo( parent ); |
---|
360 | |
---|
361 | return t |
---|
362 | } |
---|
363 | |
---|
364 | public static Map retrieveLevelNames( def minLevel = 1, def maxLevel = 6 ) { |
---|
365 | def levels = [:]; |
---|
366 | minLevel.upto( maxLevel ) { level -> |
---|
367 | def levelEnum = Taxon.Level.find { it.number() == level } |
---|
368 | levels[ level ] = levelEnum?.description(); |
---|
369 | } |
---|
370 | |
---|
371 | return levels; |
---|
372 | } |
---|
373 | |
---|
374 | } |
---|