Package ComboCode :: Package cc :: Package data :: Module Radio
[hide private]
[frames] | no frames]

Source Code for Module ComboCode.cc.data.Radio

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Toolbox for creating and maintaining a radio-data database. 
  5   
  6  Author: R. Lombaert 
  7   
  8  """ 
  9   
 10  import os, re 
 11  from glob import glob 
 12   
 13  import cc.path 
 14  from cc.tools.io import DataIO 
 15  from cc.tools.io.Database import Database 
 16  from cc.data import LPTools 
 17   
 18   
19 -class Radio(Database):
20 21 """ 22 Environment with several tools to load and save a database for Radio data. 23 24 """ 25
26 - def __init__(self,db_name='radio_data.db',db_path=None):
27 28 """ 29 Initializing an instance of Radio. 30 31 Inherits from the Database class and adds a bit of functionality for 32 the particular case of radio data. 33 34 Initializing this object returns a Database() class that can contain 35 radio data in the given path. If no db is already present, a new one is 36 created from scratch at given location. 37 38 The name of the database is radio_data.db by default. Can be changed if 39 needed, but ComboCode does not work with this other name. 40 41 If nothing more is requested, a new database is essentially empty. 42 43 The option to parse the content of the target folder is available: This 44 will add entries to the database for the stars in the folder and the 45 'simple' molecules and transitions. 46 47 Once a db is created, you can perform several methods on the database 48 specific to radio data management. 49 50 Examples: 51 For data located in dradio in Path.dat 52 >>> from cc.data import Radio 53 >>> db = Radio.Radio() 54 >>> db.sync() 55 56 This will create a database, automatically search your data folder for 57 data files that the script can recognize, and save them in the 58 database. The final command ( .sync() ) will then save the database to 59 your hard disk in the same folder, under 'radio_data.db'. Later when 60 you run Radio.Radio() again, it will load that same database that still 61 contains all those datafiles and TRANSITION references. 62 63 The database is structured per star_name and per transition definition. 64 So for instance, you will find the file whya_co32_APEX.fits in a dict 65 under 66 db['whya']['TRANSITION=12C16O 0 3 0 0 0 2 0 0 APEX 0.0'] 67 where you will find 68 dict([('whya_co32_APEX.fits',None)]) 69 70 The None refers to the value of the dictionary entry for that file. 71 This is replaced by the fit results of the line when 72 >>> db.fitLP(filename='whya_co32_Maercker_new_JCMT.fits') 73 is ran. The method redoes the fit by default, regardless of there being 74 an entry for it already. Any required fitting parameters can be passed 75 along to the function (see LPTools.fitLP()). CC loads fit results from 76 the database. 77 78 If multiple data files are available for the same star, the same 79 transition and the same telescope, the multiple data files will also be 80 contained in this dict. Note that the transition definition is quite 81 specific: 1 space between entries, and 11 entries total. It is 82 identical to the input in your combocode inputfile (excluding the final 83 number n_quad, which is model input and has nothing to do with your 84 data). 85 86 Lastly, you can do multiple things with this database. 87 >>> db.addData(star_name='whya',\ 88 trans='TRANSITION=12C16O 0 3 0 0 0 2 0 0 JCMT 0.0',\ 89 filename='whya_co32_Maercker_new_JCMT.fits') 90 will add a new entry to the database if it is not there. This is useful 91 for those molecules/filenames which have not been automatically found 92 by the parseFolder method, because 93 with this method you can still add them whichever way you want. 94 95 >>> db.removeData(star_name='whya',\ 96 trans='TRANSITION=12C16O 0 3 0 0 0 2 0 0 JCMT 0.0') 97 removes a whole transition from the database. Alternatively 98 99 >>> db.removeData(star_name='whya',\ 100 filename='whya_co32_Maercker_new_JCMT.fits') 101 removes a single filename from the database. (does not delete the file) 102 103 And if you want to know which datafiles in your data folder are not yet 104 in the database (so you know which you have to add manually): 105 >>> db.checkFolder() 106 prints them out for you. 107 108 Remember, every time you add or remove data or do a parseFolder(), you 109 have to save the contents of your database in python to the hard disk 110 by doing: 111 >>> db.sync() 112 113 As long as you don't do this, the hard disk version of the datafile 114 will remain unchanged. 115 116 @keyword db_name: The name of the database. The default only is used by 117 ComboCode. 118 119 (default: radio_data.db) 120 @type db_name: str 121 @keyword db_path: The path to the db if not the default dradio in 122 Path.dat 123 124 (default: None) 125 @type db_path: str 126 127 128 """ 129 130 if db_path is None: 131 fn = os.path.join(cc.path.dradio,db_name) 132 else: 133 fn = os.path.join(db_path,db_name) 134 super(Radio,self).__init__(db_path=fn)
135 136 137
138 - def parseFolder(self):
139 140 ''' 141 Parse the db folder and add filenames to recognized transitions. 142 143 Includes: 144 - CO and its isotopologues 145 - SiO and its isotopologues 146 - SiS and its isotopologues 147 - H2O and pH2O, and their isotopologues 148 - SO2 and SO 149 - CS 150 - PO and PN 151 - H2CO 152 153 Particularly excludes: (because no naming convention/too complex) 154 - CN 155 - HCN and its isotopologues 156 - HCO+ 157 158 ''' 159 160 ggf = sorted(glob(self.folder+'/*.dat') + glob(self.folder+'/*.fits')) 161 ggf = [os.path.split(ff)[1] for ff in ggf] 162 ggf = [ff for ff in ggf if '_' in ff] 163 ggf = [ff for ff in ggf if ff[0] != '_'] 164 fsplit = [ff.split('_') for ff in ggf] 165 166 #-- Convention: star names come first 167 stars = [ff.pop(0) for ff in fsplit] 168 169 #-- Convention: telescope names are at end of filename before extension 170 telescopes = [os.path.splitext(ff.pop(-1))[0] for ff in fsplit] 171 172 #-- Convention: vibrational states != 0 in second place in filename. 173 # Vibrational states v1 are added automatically. Others are not, for 174 # now. 175 vibs = [(ff[0][0] == 'v' and len(ff[0]) == 2) and ff.pop(0)[1] or '0' 176 for ff in fsplit] 177 178 #-- Convention: molecule names always first in the molecule tag 179 molec_trans = ['_'.join(ff) for ff in fsplit] 180 181 defmolecs = DataIO.getInputData(keyword='TYPE_SHORT',\ 182 filename='Molecule.dat') 183 defmolecs_short = DataIO.getInputData(keyword='NAME_SHORT',\ 184 filename='Molecule.dat') 185 defspec_indices = DataIO.getInputData(keyword='SPEC_INDICES',\ 186 filename='Molecule.dat') 187 for ff,s,mt,tel,vib in zip(ggf,stars,molec_trans,telescopes,vibs): 188 if vib not in ['1','0']: 189 continue 190 this_dm,this_dms,this_dsi = None, None, None 191 for dm,dms,dsi in zip(defmolecs,defmolecs_short,defspec_indices): 192 if mt[:len(dms)] == dms: 193 this_dm = dm 194 this_dms = dms 195 this_dsi = dsi 196 break 197 198 #-- Don't bother with these molecules: not clear how they are used 199 # as input for GASTRoNOoM. 200 #-- Note that the parser can already detect decimal quantum numbers 201 # as required by CN 202 evil = ['cn','hcn','h13cn','hco+','oh'] 203 204 #-- If molecule is not found, or is an evil molecule, or is not of 205 # spectral index type 0, 1, or 2, then skip the entry. 206 if this_dm == None or this_dms in evil or this_dsi not in [0,1,2]: 207 continue 208 209 mt = mt.replace(this_dms,'',1) 210 211 #-- Create the regular expression for the filename convention 212 # Allows co32, h2o123132, h2o-1_2_3-1_3_2 213 p = re.compile('\d{2,}|-[\d\.?\d_?]+-[\d\.?\d_?]+') 214 parsed = p.match(mt).group().rstrip('_').split('-') 215 #-- Note that if a NoneType AttributeError occurs, it's probably 216 # because the molecule has not been added to the convention list 217 # yet, and is therefore considered evil. Added it to the evil list 218 219 #-- if parsed contains one element, several options are possible 220 if len(parsed) == 1: 221 #-- dsi == 1 can only take 6 quantum numbers, 3 per level 222 if len(parsed[0]) == 6 and this_dsi == 1: 223 tupper = parsed[0][0:3] 224 tlower = parsed[0][3:6] 225 #-- dsi == 2 can only take 4 quantum numbers, 2 per level 226 elif len(parsed[0]) == 4 and this_dsi == 2: 227 tupper = parsed[0][0:2] 228 tlower = parsed[0][2:4] 229 #-- dsi == 0 can only take 2 quantum numbers, 1 per level 230 # several combinations are possible for multiple digit levels 231 elif len(parsed[0]) == 6 and this_dsi == 0: 232 tupper = [parsed[0][0:3]] 233 tlower = [parsed[0][3:6]] 234 elif len(parsed[0]) == 5 and this_dsi == 0: 235 tupper = [parsed[0][0:3]] 236 tlower = [parsed[0][3:5]] 237 elif len(parsed[0]) == 4 and this_dsi == 0: 238 tupper = [parsed[0][0:2]] 239 tlower = [parsed[0][2:4]] 240 elif len(parsed[0]) == 3 and this_dsi == 0: 241 tupper = [parsed[0][0:2]] 242 tlower = [parsed[0][2:3]] 243 elif len(parsed[0]) == 2 and this_dsi == 0: 244 tupper = [parsed[0][0]] 245 tlower = [parsed[0][1]] 246 #-- Any other combinations are not possible, and must be 247 # separated with '_', '-' in the filename 248 else: 249 continue 250 251 #-- If there are three elements, the second and third give the 252 # upper and lower level respectively, each quantum number 253 # separated by '_' 254 elif len(parsed) == 3: 255 #-- parsed[0] is just an empty string 256 tupper = parsed[1].split('_') 257 tlower = parsed[2].split('_') 258 #-- Double check if the correct amount of quantum numbers are 259 # given for each type 260 if (this_dsi == 0 and len(tupper) != 1) or \ 261 (this_dsi == 1 and len(tupper) != 3) or \ 262 (this_dsi == 2 and len(tupper) != 2): 263 continue 264 265 #-- If neither, continue without adding anything to the db 266 else: 267 continue 268 269 if this_dsi == 2: 270 trans = 'TRANSITION=%s %s %s %s 0 %s %s %s 0 %s 0.0'\ 271 %(this_dm,vib,tupper[0],tupper[1],\ 272 vib,tlower[0],tlower[1],tel) 273 elif this_dsi == 1: 274 trans = 'TRANSITION=%s 0.0'\ 275 %(' '.join([this_dm,vib,tupper[0],tupper[1],tupper[2],\ 276 vib,tlower[0],tlower[1],tlower[2],tel])) 277 elif this_dsi == 0: 278 trans = 'TRANSITION=%s %s %s 0 0 %s %s 0 0 %s 0.0'\ 279 %(this_dm,vib,tupper[0],vib,tlower[0],tel) 280 281 self.addData(star_name=s,trans=trans,filename=ff)
282 283 284
285 - def addStar(self,star_name):
286 287 ''' 288 Add a new star to the database. A check is ran to see if the requested 289 star is known in ~/ComboCode/usr/Star.dat. 290 291 @param star_name: The name of the star (from Star.dat) 292 @type star_name: str 293 294 ''' 295 296 if star_name in self.keys(): 297 return 298 known_stars = DataIO.getInputData(keyword='STAR_NAME') 299 if star_name not in known_stars: 300 print('Requested star %s is unknown in %s/Star.dat.'\ 301 %(star_name,cc.path.usr)) 302 return 303 self[star_name] = dict()
304 305 306
307 - def addData(self,star_name,filename,trans):
308 309 ''' 310 Add a new file to the database with given transition definition. If the 311 transition is already present in the database for this particular star, the 312 filename is added to the dictionary for that transition. 313 314 A single transition for a star can thus have multiple filenames associated 315 with it! 316 317 Note that this method does NOT automatically sync (ie save changes to the 318 hard disk) the database. That must be done through an additional flag to 319 avoid excess overhead. 320 321 The transition definition is checked for correctness: single spaces between 322 entries in the defintion, and a total of 11 entries: 1 molecule (shorthand) 323 8 quantum numbers, 1 offset 324 325 The fit is not done here. Instead the filename key is added to the 326 transition's dictionary with value None. 327 328 @param star_name: The name of the star for which to add the data. 329 @type star_name: str 330 @param filename: The filename of the radio data to be added. Must be in the 331 db folder! Only the filename is used. Any folder path is 332 cut. 333 @type filename: str 334 @param trans: The transition definition. Must be in the correct format! 335 @type trans: str 336 337 ''' 338 339 filename = os.path.split(filename)[1] 340 if not self.has_key(star_name): 341 self.addStar(star_name) 342 343 entries = trans.split() 344 trans = ' '.join(entries) 345 346 defmolecs = DataIO.getInputData(keyword='TYPE_SHORT',\ 347 filename='Molecule.dat') 348 this_molec = entries[0].replace('TRANSITION=','',1) 349 if this_molec not in defmolecs: 350 print('Molecule %s unrecognized.'%this_molec) 351 return 352 353 if len(entries) != 11: 354 print('Incorrect transition definition:') 355 print(trans) 356 return 357 358 if self[star_name].has_key(trans): 359 if filename not in self[star_name][trans].keys(): 360 self[star_name][trans][filename] = None 361 self.addChangedKey(star_name) 362 else: 363 self[star_name][trans] = dict([(filename,None)]) 364 self.addChangedKey(star_name)
365 366 367
368 - def removeData(self,star_name,filename='',trans=''):
369 370 ''' 371 Remove filenames or transition definitions from the database for a given 372 star_name. Either filename or trans must be given. 373 374 @param star_name: Name of the star for which data or trans is removed 375 @type star_name: str 376 377 @keyword filename: data filename to be removed. If empty string, a 378 transition definition must be given (ie remove 379 everything for a given transition definition) 380 381 (default: '') 382 @type filename: str 383 @keyword trans: Transition definition to be removed. Irrelevant if a 384 filename is also given (latter takes precedence) 385 386 (default: '') 387 @type trans: str 388 389 ''' 390 391 if not self.has_key(star_name): 392 print('Star %s not found in database.'%star_name) 393 return 394 395 #-- If a filename is given, remove that, regardless of a given transition. 396 if filename: 397 trans = '' 398 filename = os.path.split(filename)[1] 399 elif not trans: 400 print('No filename or transition given. No change is made to the'+\ 401 'radio database') 402 return 403 404 if filename: 405 for k,v in self[star_name].items(): 406 if filename in v.keys(): 407 del self[star_name][k][filename] 408 self.addChangedKey(star_name) 409 410 #-- Remove the transition if no filenames are associated 411 # with it anymore, i.e. the dict is empty. 412 if not self[star_name][k]: 413 del self[star_name][k] 414 self.addChangedKey(star_name) 415 break 416 417 else: 418 if self[star_name].has_key(trans): 419 del self[star_name][trans] 420 self.addChangedKey(star_name)
421 422 423
424 - def checkFolder(self):
425 426 ''' 427 Check which radio datafiles are not included in the db present in its 428 folder. 429 430 ''' 431 432 ggf = sorted(glob(self.folder+'/*.dat') + glob(self.folder+'/*.fits')) 433 ggf = [os.path.split(ff)[1] for ff in ggf] 434 ggf = [ff for ff in ggf if ff[0] != '_'] 435 436 dbfiles = [] 437 for s in self.keys(): 438 for v in self[s].values(): 439 dbfiles.extend(v) 440 441 print('These files have not been found in the database:') 442 for ff in ggf: 443 if ff not in dbfiles: 444 print(ff)
445 446
447 - def convertOldDb(self):
448 449 ''' 450 Convert an existing Radio database from the old format: 451 db[star][trans] = list 452 to the new format: 453 db[trans][trans][filename] = fit dict 454 455 ''' 456 457 for ss in self.keys(): 458 for tt in self[ss].keys(): 459 nd = dict() 460 for ff in self[ss][tt]: 461 nd[ff] = None 462 self[ss][tt] = nd 463 self.addChangedKey(ss)
464 465
466 - def filterContents(self,star_name=''):
467 468 ''' 469 Filter out the fit results from the contents of the database, eg for 470 printing purposes. 471 472 If star_name is given, only the star will be printed. Otherwise the 473 full database is printed. 474 475 @keyword star_name: The requested star. Default is to print all of them 476 477 (default: '') 478 @type star_name: str 479 480 @return: The dictionary with just the relevant contents (trans and file 481 names) 482 @rtype: dict 483 484 ''' 485 486 def __removeFits(self,ss): 487 488 ''' 489 Helper method to retrieve only the filenames for every transition 490 of given star. 491 492 ''' 493 494 fdd = dict() 495 for tt in self[ss].keys(): 496 fdd[tt] = [ff for ff in sorted(self[ss][tt].keys())] 497 return fdd
498 499 500 pdd = dict() 501 if star_name: 502 pdd[star_name] = __removeFits(self,star_name) 503 else: 504 for ss in self.keys(): 505 pdd[ss] = __removeFits(self,ss) 506 507 return pdd
508 509 510 511
512 - def fitLP(self,star_name='',filename='',trans='',replace=0,**kwargs):
513 514 ''' 515 Fit the data line profiles with a soft parabola or a Gaussian according 516 to LPTools.fitLP() (see that method for further details). 517 518 Input keywords require one of these: 519 - no keys: all lines in db are fitted 520 - star_name: All lines for star are fitted 521 - star_name, trans: all lines for transition of star are fitted 522 - star_name, filename: only this filename is fitted 523 524 The fit is NOT redone by default, if there is an entry in db already. 525 You can force a replacement fit by turning replace on. 526 527 Note that this method does NOT automatically sync (ie save changes to 528 the hard disk) the database. That must be done through an additional 529 flag to avoid excess overhead. 530 531 @keyword star_name: The name of the star for which to add the data. If 532 not given, all files in db are fitted. 533 534 (default: '') 535 @type star_name: str 536 @keyword filename: The filename of the radio data to be fitted. Must be 537 in the db folder! Only the filename is used. Any 538 folder path is cut. 539 540 (default: '') 541 @type filename: str 542 @keyword trans: The transition definition. Must be in the correct 543 format! 544 545 (default: '') 546 @type trans: str 547 @keyword replace: Replace existing fits with a new one. If off, only 548 lines that have not been fitted yet are added. 549 550 (default: 0) 551 @type replace: bool 552 @keyword kwargs: Any additional keywords that are passed on to 553 LPTools.fitLP() 554 @type kwargs: dict 555 556 557 ''' 558 559 def __fitLP(self,ss,tt,ff,replace): 560 561 ''' 562 Helper method giving star_name, transition, and filename for the 563 fitter to do and then remember to save when sync() is ran. 564 565 Accessed only by Radio().fitLP(). 566 567 ''' 568 569 if not replace and self[ss][tt][ff]: return 570 fn = os.path.join(self.folder,ff) 571 try: 572 fitr = LPTools.fitLP(filename=fn,**kwargs) 573 except ValueError: 574 print 'Line profile fit in %s failed.'%ff 575 fitr = None 576 self[ss][tt][ff] = fitr 577 self.addChangedKey(ss)
578 579 580 if filename and not star_name: 581 star_name = os.path.split(filename)[1].split('_')[0] 582 583 if trans and not star_name: 584 print 'Define star_name to continue.' 585 return 586 587 if star_name and not self.has_key(star_name): 588 print 'Star not found.' 589 return 590 591 #-- No star_name given, so run through all stars, transitions and files 592 if not star_name: 593 for ss in self.keys(): 594 for tt in self[ss].keys(): 595 for ff in self[ss][tt].keys(): 596 __fitLP(self,ss,tt,ff,replace) 597 598 #-- star_name given. If trans is given, run through all its filenames 599 elif trans: 600 if trans not in self[star_name].keys(): 601 print 'Transition not found.' 602 return 603 for ff in self[star_name][trans].keys(): 604 __fitLP(self,star_name,trans,ff,replace) 605 606 #-- star_name given. If trans is not given, but filename is, fit it. 607 elif filename: 608 filename = os.path.split(filename)[1] 609 for k,v in self[star_name].items(): 610 if filename in v.keys(): 611 trans = k 612 break 613 if not trans: 614 print 'Filename not found.' 615 return 616 __fitLP(self,star_name,trans,filename,replace) 617 618 #-- star_name given. No trans/filename given. Fit everything for star 619 else: 620 for tt in self[star_name].keys(): 621 for ff in self[star_name][tt].keys(): 622 __fitLP(self,star_name,tt,ff,replace) 623