1
2
3 """
4 Toolbox for creating and maintaining a radio-data database.
5
6 Author: R. Lombaert
7
8 """
9
10 import os, re
11 from glob import glob
12
13 import cc.path
14 from cc.tools.io import DataIO
15 from cc.tools.io.Database import Database
16 from cc.data import LPTools
17
18
20
21 """
22 Environment with several tools to load and save a database for Radio data.
23
24 """
25
26 - def __init__(self,db_name='radio_data.db',db_path=None):
27
28 """
29 Initializing an instance of Radio.
30
31 Inherits from the Database class and adds a bit of functionality for
32 the particular case of radio data.
33
34 Initializing this object returns a Database() class that can contain
35 radio data in the given path. If no db is already present, a new one is
36 created from scratch at given location.
37
38 The name of the database is radio_data.db by default. Can be changed if
39 needed, but ComboCode does not work with this other name.
40
41 If nothing more is requested, a new database is essentially empty.
42
43 The option to parse the content of the target folder is available: This
44 will add entries to the database for the stars in the folder and the
45 'simple' molecules and transitions.
46
47 Once a db is created, you can perform several methods on the database
48 specific to radio data management.
49
50 Examples:
51 For data located in dradio in Path.dat
52 >>> from cc.data import Radio
53 >>> db = Radio.Radio()
54 >>> db.sync()
55
56 This will create a database, automatically search your data folder for
57 data files that the script can recognize, and save them in the
58 database. The final command ( .sync() ) will then save the database to
59 your hard disk in the same folder, under 'radio_data.db'. Later when
60 you run Radio.Radio() again, it will load that same database that still
61 contains all those datafiles and TRANSITION references.
62
63 The database is structured per star_name and per transition definition.
64 So for instance, you will find the file whya_co32_APEX.fits in a dict
65 under
66 db['whya']['TRANSITION=12C16O 0 3 0 0 0 2 0 0 APEX 0.0']
67 where you will find
68 dict([('whya_co32_APEX.fits',None)])
69
70 The None refers to the value of the dictionary entry for that file.
71 This is replaced by the fit results of the line when
72 >>> db.fitLP(filename='whya_co32_Maercker_new_JCMT.fits')
73 is ran. The method redoes the fit by default, regardless of there being
74 an entry for it already. Any required fitting parameters can be passed
75 along to the function (see LPTools.fitLP()). CC loads fit results from
76 the database.
77
78 If multiple data files are available for the same star, the same
79 transition and the same telescope, the multiple data files will also be
80 contained in this dict. Note that the transition definition is quite
81 specific: 1 space between entries, and 11 entries total. It is
82 identical to the input in your combocode inputfile (excluding the final
83 number n_quad, which is model input and has nothing to do with your
84 data).
85
86 Lastly, you can do multiple things with this database.
87 >>> db.addData(star_name='whya',\
88 trans='TRANSITION=12C16O 0 3 0 0 0 2 0 0 JCMT 0.0',\
89 filename='whya_co32_Maercker_new_JCMT.fits')
90 will add a new entry to the database if it is not there. This is useful
91 for those molecules/filenames which have not been automatically found
92 by the parseFolder method, because
93 with this method you can still add them whichever way you want.
94
95 >>> db.removeData(star_name='whya',\
96 trans='TRANSITION=12C16O 0 3 0 0 0 2 0 0 JCMT 0.0')
97 removes a whole transition from the database. Alternatively
98
99 >>> db.removeData(star_name='whya',\
100 filename='whya_co32_Maercker_new_JCMT.fits')
101 removes a single filename from the database. (does not delete the file)
102
103 And if you want to know which datafiles in your data folder are not yet
104 in the database (so you know which you have to add manually):
105 >>> db.checkFolder()
106 prints them out for you.
107
108 Remember, every time you add or remove data or do a parseFolder(), you
109 have to save the contents of your database in python to the hard disk
110 by doing:
111 >>> db.sync()
112
113 As long as you don't do this, the hard disk version of the datafile
114 will remain unchanged.
115
116 @keyword db_name: The name of the database. The default only is used by
117 ComboCode.
118
119 (default: radio_data.db)
120 @type db_name: str
121 @keyword db_path: The path to the db if not the default dradio in
122 Path.dat
123
124 (default: None)
125 @type db_path: str
126
127
128 """
129
130 if db_path is None:
131 fn = os.path.join(cc.path.dradio,db_name)
132 else:
133 fn = os.path.join(db_path,db_name)
134 super(Radio,self).__init__(db_path=fn)
135
136
137
139
140 '''
141 Parse the db folder and add filenames to recognized transitions.
142
143 Includes:
144 - CO and its isotopologues
145 - SiO and its isotopologues
146 - SiS and its isotopologues
147 - H2O and pH2O, and their isotopologues
148 - SO2 and SO
149 - CS
150 - PO and PN
151 - H2CO
152
153 Particularly excludes: (because no naming convention/too complex)
154 - CN
155 - HCN and its isotopologues
156 - HCO+
157
158 '''
159
160 ggf = sorted(glob(self.folder+'/*.dat') + glob(self.folder+'/*.fits'))
161 ggf = [os.path.split(ff)[1] for ff in ggf]
162 ggf = [ff for ff in ggf if '_' in ff]
163 ggf = [ff for ff in ggf if ff[0] != '_']
164 fsplit = [ff.split('_') for ff in ggf]
165
166
167 stars = [ff.pop(0) for ff in fsplit]
168
169
170 telescopes = [os.path.splitext(ff.pop(-1))[0] for ff in fsplit]
171
172
173
174
175 vibs = [(ff[0][0] == 'v' and len(ff[0]) == 2) and ff.pop(0)[1] or '0'
176 for ff in fsplit]
177
178
179 molec_trans = ['_'.join(ff) for ff in fsplit]
180
181 defmolecs = DataIO.getInputData(keyword='TYPE_SHORT',\
182 filename='Molecule.dat')
183 defmolecs_short = DataIO.getInputData(keyword='NAME_SHORT',\
184 filename='Molecule.dat')
185 defspec_indices = DataIO.getInputData(keyword='SPEC_INDICES',\
186 filename='Molecule.dat')
187 for ff,s,mt,tel,vib in zip(ggf,stars,molec_trans,telescopes,vibs):
188 if vib not in ['1','0']:
189 continue
190 this_dm,this_dms,this_dsi = None, None, None
191 for dm,dms,dsi in zip(defmolecs,defmolecs_short,defspec_indices):
192 if mt[:len(dms)] == dms:
193 this_dm = dm
194 this_dms = dms
195 this_dsi = dsi
196 break
197
198
199
200
201
202 evil = ['cn','hcn','h13cn','hco+','oh']
203
204
205
206 if this_dm == None or this_dms in evil or this_dsi not in [0,1,2]:
207 continue
208
209 mt = mt.replace(this_dms,'',1)
210
211
212
213 p = re.compile('\d{2,}|-[\d\.?\d_?]+-[\d\.?\d_?]+')
214 parsed = p.match(mt).group().rstrip('_').split('-')
215
216
217
218
219
220 if len(parsed) == 1:
221
222 if len(parsed[0]) == 6 and this_dsi == 1:
223 tupper = parsed[0][0:3]
224 tlower = parsed[0][3:6]
225
226 elif len(parsed[0]) == 4 and this_dsi == 2:
227 tupper = parsed[0][0:2]
228 tlower = parsed[0][2:4]
229
230
231 elif len(parsed[0]) == 6 and this_dsi == 0:
232 tupper = [parsed[0][0:3]]
233 tlower = [parsed[0][3:6]]
234 elif len(parsed[0]) == 5 and this_dsi == 0:
235 tupper = [parsed[0][0:3]]
236 tlower = [parsed[0][3:5]]
237 elif len(parsed[0]) == 4 and this_dsi == 0:
238 tupper = [parsed[0][0:2]]
239 tlower = [parsed[0][2:4]]
240 elif len(parsed[0]) == 3 and this_dsi == 0:
241 tupper = [parsed[0][0:2]]
242 tlower = [parsed[0][2:3]]
243 elif len(parsed[0]) == 2 and this_dsi == 0:
244 tupper = [parsed[0][0]]
245 tlower = [parsed[0][1]]
246
247
248 else:
249 continue
250
251
252
253
254 elif len(parsed) == 3:
255
256 tupper = parsed[1].split('_')
257 tlower = parsed[2].split('_')
258
259
260 if (this_dsi == 0 and len(tupper) != 1) or \
261 (this_dsi == 1 and len(tupper) != 3) or \
262 (this_dsi == 2 and len(tupper) != 2):
263 continue
264
265
266 else:
267 continue
268
269 if this_dsi == 2:
270 trans = 'TRANSITION=%s %s %s %s 0 %s %s %s 0 %s 0.0'\
271 %(this_dm,vib,tupper[0],tupper[1],\
272 vib,tlower[0],tlower[1],tel)
273 elif this_dsi == 1:
274 trans = 'TRANSITION=%s 0.0'\
275 %(' '.join([this_dm,vib,tupper[0],tupper[1],tupper[2],\
276 vib,tlower[0],tlower[1],tlower[2],tel]))
277 elif this_dsi == 0:
278 trans = 'TRANSITION=%s %s %s 0 0 %s %s 0 0 %s 0.0'\
279 %(this_dm,vib,tupper[0],vib,tlower[0],tel)
280
281 self.addData(star_name=s,trans=trans,filename=ff)
282
283
284
286
287 '''
288 Add a new star to the database. A check is ran to see if the requested
289 star is known in ~/ComboCode/usr/Star.dat.
290
291 @param star_name: The name of the star (from Star.dat)
292 @type star_name: str
293
294 '''
295
296 if star_name in self.keys():
297 return
298 known_stars = DataIO.getInputData(keyword='STAR_NAME')
299 if star_name not in known_stars:
300 print('Requested star %s is unknown in %s/Star.dat.'\
301 %(star_name,cc.path.usr))
302 return
303 self[star_name] = dict()
304
305
306
307 - def addData(self,star_name,filename,trans):
308
309 '''
310 Add a new file to the database with given transition definition. If the
311 transition is already present in the database for this particular star, the
312 filename is added to the dictionary for that transition.
313
314 A single transition for a star can thus have multiple filenames associated
315 with it!
316
317 Note that this method does NOT automatically sync (ie save changes to the
318 hard disk) the database. That must be done through an additional flag to
319 avoid excess overhead.
320
321 The transition definition is checked for correctness: single spaces between
322 entries in the defintion, and a total of 11 entries: 1 molecule (shorthand)
323 8 quantum numbers, 1 offset
324
325 The fit is not done here. Instead the filename key is added to the
326 transition's dictionary with value None.
327
328 @param star_name: The name of the star for which to add the data.
329 @type star_name: str
330 @param filename: The filename of the radio data to be added. Must be in the
331 db folder! Only the filename is used. Any folder path is
332 cut.
333 @type filename: str
334 @param trans: The transition definition. Must be in the correct format!
335 @type trans: str
336
337 '''
338
339 filename = os.path.split(filename)[1]
340 if not self.has_key(star_name):
341 self.addStar(star_name)
342
343 entries = trans.split()
344 trans = ' '.join(entries)
345
346 defmolecs = DataIO.getInputData(keyword='TYPE_SHORT',\
347 filename='Molecule.dat')
348 this_molec = entries[0].replace('TRANSITION=','',1)
349 if this_molec not in defmolecs:
350 print('Molecule %s unrecognized.'%this_molec)
351 return
352
353 if len(entries) != 11:
354 print('Incorrect transition definition:')
355 print(trans)
356 return
357
358 if self[star_name].has_key(trans):
359 if filename not in self[star_name][trans].keys():
360 self[star_name][trans][filename] = None
361 self.addChangedKey(star_name)
362 else:
363 self[star_name][trans] = dict([(filename,None)])
364 self.addChangedKey(star_name)
365
366
367
368 - def removeData(self,star_name,filename='',trans=''):
369
370 '''
371 Remove filenames or transition definitions from the database for a given
372 star_name. Either filename or trans must be given.
373
374 @param star_name: Name of the star for which data or trans is removed
375 @type star_name: str
376
377 @keyword filename: data filename to be removed. If empty string, a
378 transition definition must be given (ie remove
379 everything for a given transition definition)
380
381 (default: '')
382 @type filename: str
383 @keyword trans: Transition definition to be removed. Irrelevant if a
384 filename is also given (latter takes precedence)
385
386 (default: '')
387 @type trans: str
388
389 '''
390
391 if not self.has_key(star_name):
392 print('Star %s not found in database.'%star_name)
393 return
394
395
396 if filename:
397 trans = ''
398 filename = os.path.split(filename)[1]
399 elif not trans:
400 print('No filename or transition given. No change is made to the'+\
401 'radio database')
402 return
403
404 if filename:
405 for k,v in self[star_name].items():
406 if filename in v.keys():
407 del self[star_name][k][filename]
408 self.addChangedKey(star_name)
409
410
411
412 if not self[star_name][k]:
413 del self[star_name][k]
414 self.addChangedKey(star_name)
415 break
416
417 else:
418 if self[star_name].has_key(trans):
419 del self[star_name][trans]
420 self.addChangedKey(star_name)
421
422
423
425
426 '''
427 Check which radio datafiles are not included in the db present in its
428 folder.
429
430 '''
431
432 ggf = sorted(glob(self.folder+'/*.dat') + glob(self.folder+'/*.fits'))
433 ggf = [os.path.split(ff)[1] for ff in ggf]
434 ggf = [ff for ff in ggf if ff[0] != '_']
435
436 dbfiles = []
437 for s in self.keys():
438 for v in self[s].values():
439 dbfiles.extend(v)
440
441 print('These files have not been found in the database:')
442 for ff in ggf:
443 if ff not in dbfiles:
444 print(ff)
445
446
448
449 '''
450 Convert an existing Radio database from the old format:
451 db[star][trans] = list
452 to the new format:
453 db[trans][trans][filename] = fit dict
454
455 '''
456
457 for ss in self.keys():
458 for tt in self[ss].keys():
459 nd = dict()
460 for ff in self[ss][tt]:
461 nd[ff] = None
462 self[ss][tt] = nd
463 self.addChangedKey(ss)
464
465
466 - def filterContents(self,star_name=''):
467
468 '''
469 Filter out the fit results from the contents of the database, eg for
470 printing purposes.
471
472 If star_name is given, only the star will be printed. Otherwise the
473 full database is printed.
474
475 @keyword star_name: The requested star. Default is to print all of them
476
477 (default: '')
478 @type star_name: str
479
480 @return: The dictionary with just the relevant contents (trans and file
481 names)
482 @rtype: dict
483
484 '''
485
486 def __removeFits(self,ss):
487
488 '''
489 Helper method to retrieve only the filenames for every transition
490 of given star.
491
492 '''
493
494 fdd = dict()
495 for tt in self[ss].keys():
496 fdd[tt] = [ff for ff in sorted(self[ss][tt].keys())]
497 return fdd
498
499
500 pdd = dict()
501 if star_name:
502 pdd[star_name] = __removeFits(self,star_name)
503 else:
504 for ss in self.keys():
505 pdd[ss] = __removeFits(self,ss)
506
507 return pdd
508
509
510
511
512 - def fitLP(self,star_name='',filename='',trans='',replace=0,**kwargs):
513
514 '''
515 Fit the data line profiles with a soft parabola or a Gaussian according
516 to LPTools.fitLP() (see that method for further details).
517
518 Input keywords require one of these:
519 - no keys: all lines in db are fitted
520 - star_name: All lines for star are fitted
521 - star_name, trans: all lines for transition of star are fitted
522 - star_name, filename: only this filename is fitted
523
524 The fit is NOT redone by default, if there is an entry in db already.
525 You can force a replacement fit by turning replace on.
526
527 Note that this method does NOT automatically sync (ie save changes to
528 the hard disk) the database. That must be done through an additional
529 flag to avoid excess overhead.
530
531 @keyword star_name: The name of the star for which to add the data. If
532 not given, all files in db are fitted.
533
534 (default: '')
535 @type star_name: str
536 @keyword filename: The filename of the radio data to be fitted. Must be
537 in the db folder! Only the filename is used. Any
538 folder path is cut.
539
540 (default: '')
541 @type filename: str
542 @keyword trans: The transition definition. Must be in the correct
543 format!
544
545 (default: '')
546 @type trans: str
547 @keyword replace: Replace existing fits with a new one. If off, only
548 lines that have not been fitted yet are added.
549
550 (default: 0)
551 @type replace: bool
552 @keyword kwargs: Any additional keywords that are passed on to
553 LPTools.fitLP()
554 @type kwargs: dict
555
556
557 '''
558
559 def __fitLP(self,ss,tt,ff,replace):
560
561 '''
562 Helper method giving star_name, transition, and filename for the
563 fitter to do and then remember to save when sync() is ran.
564
565 Accessed only by Radio().fitLP().
566
567 '''
568
569 if not replace and self[ss][tt][ff]: return
570 fn = os.path.join(self.folder,ff)
571 try:
572 fitr = LPTools.fitLP(filename=fn,**kwargs)
573 except ValueError:
574 print 'Line profile fit in %s failed.'%ff
575 fitr = None
576 self[ss][tt][ff] = fitr
577 self.addChangedKey(ss)
578
579
580 if filename and not star_name:
581 star_name = os.path.split(filename)[1].split('_')[0]
582
583 if trans and not star_name:
584 print 'Define star_name to continue.'
585 return
586
587 if star_name and not self.has_key(star_name):
588 print 'Star not found.'
589 return
590
591
592 if not star_name:
593 for ss in self.keys():
594 for tt in self[ss].keys():
595 for ff in self[ss][tt].keys():
596 __fitLP(self,ss,tt,ff,replace)
597
598
599 elif trans:
600 if trans not in self[star_name].keys():
601 print 'Transition not found.'
602 return
603 for ff in self[star_name][trans].keys():
604 __fitLP(self,star_name,trans,ff,replace)
605
606
607 elif filename:
608 filename = os.path.split(filename)[1]
609 for k,v in self[star_name].items():
610 if filename in v.keys():
611 trans = k
612 break
613 if not trans:
614 print 'Filename not found.'
615 return
616 __fitLP(self,star_name,trans,filename,replace)
617
618
619 else:
620 for tt in self[star_name].keys():
621 for ff in self[star_name][tt].keys():
622 __fitLP(self,star_name,tt,ff,replace)
623