ComboCode.cc.tools.io.DataIO

1 # -*- coding: utf-8 -*- 2 3 """ 4 Module for reading and writing data files as well as parsing of information. 5 6 Author: R. Lombaert 7 8 """ 9 10 import os, sys 11 import subprocess 12 from glob import glob 13 import numpy as np 14 from scipy import array,zeros 15 from PyPDF2 import PdfFileMerger 16 from matplotlib import mlab 17 18 import cc.path 19 20

21 -def read(func,module=sys.modules[__name__],return_func=0,*args,**kwargs):

22 23 ''' 24 Takes any function given as a str (preceding modules separated with a dot 25 '.' ) and passes any additional args and kwargs to the function found in 26 either the DataIO module or the given module. 27 28 Note that numpy is important in DataIO as np, and can also be requested this 29 way. 30 31 @param func: The requested function. Can also be given as a function, in 32 which case no search for the function is done, and this returns 33 that function's results. 34 @type func: str/function 35 36 @keyword module: The home module of the function. Default is DataIO, and any 37 module therein can be accessed by passing module.func to 38 the keyword. 39 40 (default: DataIO) 41 @type module: module 42 @keyword return_func: Return the function itself, rather than calling it. 43 44 (default: 0) 45 @type return_func: bool 46 47 @return: the function's output is returned. 48 @rtype: anything. Or function. 49 50 ''' 51 52 #-- Function not yet found. Search it. 53 if isinstance(func,str): 54 #-- Recursively find the function of loaded modules in given 55 # module or a function of the DataIO module itself if no '.' 56 for fstr in func.split('.'): 57 if fstr == 'DataIO': 58 module = sys.modules[__name__] 59 continue 60 module = getattr(module,fstr) 61 62 #-- Only return the function, don't call it 63 if return_func: 64 return module 65 66 #-- Call the function and return the results. 67 return module(*args,**kwargs)

68 69

70 -def findKey(i,data,key):

71 72 ''' 73 Find the index of the line that contains the first occurrence of a keyword. 74 75 The search is case-insensitive. 76 77 @param i: The starting index of the search 78 @type i: int 79 @param data: The data read with readFile. Either split on a delimiter, or 80 the full line as a str. 81 @type data: list[list]/list[str] 82 @param key: The keyword that is searched 83 @type key: str 84 85 @return: The index of the line that contains the key for the first time 86 @rtype: int 87 88 ''' 89 90 #-- The search is case-insensitive. 91 key = key.upper() 92 93 #-- Determine if lines are given as string or a list of split strings. 94 if isinstance(data[0],str): 95 key_absent = lambda x: x.upper().find(key) == -1 96 else: 97 key_absent = lambda x: ' '.join(x).upper().find(key) == -1 98 99 #-- key_absent returns True as long as it cannot find the keyword. 100 # while loop ensures we don't have to format the entire file. Can be 101 # arduous if the file is large. 102 while key_absent(data[i]): 103 i += 1 104 105 return i

106 107 108

109 -def getKeyData(incr,filename,keyword,single=1):

110 111 """ 112 Search a data file with data in a single (or multiple) columns, separated by 113 comment lines containing the type of data. 114 115 The data returned follow the line that contains the key, unless incr is set 116 to 0. In this case, the line that contains the data is returned in its 117 entirety, and single is put to 0. 118 119 This method is often used for extracting MCMax output. In that case, for 120 radius and theta incr is usually the grid size (NRAD and NTHETA 121 respectively), and for any other quantity incr is NRAD*NTHETA (fi for 122 denstemp.dat). incr==0 can be used to extract inputvalues from log.dat. 123 124 @param incr: length of the data after key that is required. 125 Put this keyword to zero if you are extracting a number 126 from one line that contains the keyword itself. In that case 127 single is put to 0 so you can take your information from the 128 whole line. 129 @type incr: int 130 @param filename: name and path of the file searched 131 @type filename: string 132 @param keyword: the type of information required, always equal to one 133 of the keywords present in the file 134 @type keyword: string 135 136 @keyword single: return a list of only the first element on every row. 137 Otherwise the entire line is returned. Off by default if 138 incr == 0. 139 140 (default: 1) 141 @type single: bool 142 143 @return: The requested data 144 @rtype: list[] 145 146 """ 147 148 data = readFile(filename,' ') 149 150 #-- The line with the key is usually not what we want. So add 1. 151 i = findKey(0,data,keyword) + 1 152 153 #-- If incr is 0, we need the line itself, and not just the first value. 154 if not incr: 155 single = 0 156 i -= 1 157 incr = 1 158 159 #-- Return a single value (first of the line) or the entire line. 160 if single: 161 return [float(line[0]) for line in data[i:i+int(incr)]] 162 else: 163 return [line for line in data[i:i+int(incr)]]

164 165 166

167 -def readFortranFile(convert_cols,func=np.loadtxt,*args,**kwargs):

168 169 ''' 170 Reads a fortran data file. 171 172 The method is identical to np.loadtxt, but defines a converters dict for 173 converting double-notation into floats. 174 175 Use as np.loadtxt, but leave out the converters argument. 176 177 @param convert_cols: The indices of the columns containing double notation 178 @type convert_cols: list 179 180 @keyword func: The read function used. Default is np.loadtxt, alternative 181 is np.genfromtxt. Function requires converters keyword. 182 183 (default: np.loadtxt) 184 @type func: function 185 186 @return: The np.loadtxt output 187 @rtype: array 188 189 ''' 190 191 converters = {i: lambda x:float(x.replace('D','E')) for i in convert_cols} 192 return np.loadtxt(converters=converters,*args,**kwargs)

193 194 195

196 -def getGastronoomOutput(filename,keyword='RADIUS',begin_index=0,\ 197 return_array=0,key_index=0):

198 199 """ 200 Search GASTRoNOoM output for relevant envelope information. 201 202 @param filename: The filename of the relevant output GASTRoNOoM file 203 @type filename: string 204 205 @keyword keyword: the type of information required, always equal to one of 206 the keywords present in the outputfiles of GASTRoNOoM 207 208 (default: 'RADIUS') 209 @type keyword: string 210 @keyword begin_index: start looking for keyword at row with begin_index 211 212 (default: 0) 213 @type begin_index: int 214 @keyword return_array: Return a scipy array rather than a python list 215 216 (default: 0) 217 @type return_array: bool 218 @keyword key_index: If 0 it is automatically determined, otherwise this is 219 the column index 220 221 (default: 0) 222 @type key_index: int 223 224 @return: The requested data from the GASTRoNOoM output 225 @rtype: list/array 226 """ 227 228 keyword = keyword.upper() 229 data = readFile(filename,' ') 230 data_col_1 = [d[0] for d in data] 231 key_i = findString(begin_index,data_col_1) 232 key_j = findFloat(key_i,data_col_1) 233 if not key_index: 234 keys = ' '.join([' '.join(d).replace('\n','') 235 for d in data[key_i:key_j]]).split() 236 key_index = [key[:len(keyword)].upper() for key in keys].index(keyword) 237 #- Data never start on the first line 238 #- Starting from 1st float, all floats into list, until EOF OR end of block 239 data_i = key_j 240 #- Data may end at EOF or before a new block of data (sphinx fi) 241 data_j = findString(data_i,data_col_1) 242 if return_array: 243 dd = array([float(line[key_index].replace('D+','E+').replace('D-','E-')) 244 for line in data[data_i:data_j]]) 245 return dd 246 else: 247 return [float(line[key_index].replace('D+','E+').replace('D-','E-')) 248 for line in data[data_i:data_j]]

249 250 251

252 -def getInputData(path=cc.path.usr,keyword='STAR_NAME',filename='Star.dat',\ 253 remove_underscore=0,make_float=1,start_index=1,rindex=None):

254 255 """ 256 Search ComboCode/usr files for parameters. (Can be applied to other files 257 as well) 258 259 Includes files such as Dust.dat, Star.dat, Indices.dat, Molecule.dat. 260 261 @keyword path: Location of the input file 262 263 (default: cc.path.usr) 264 @type path: string 265 @keyword keyword: the type of information required, always equal to one of 266 the keywords present in the "Data" of ComboCode, and 267 automatically also a Star dict keyword 268 269 (default: STAR_NAME) 270 @type keyword: string 271 @keyword filename: filename in that includes wanted information 272 273 (default: 'Star.dat') 274 @type filename: string 275 @keyword remove_underscore: remove the underscores from the entries and 276 replace them by spaces. 277 278 (default: 0) 279 @type remove_underscore: bool 280 @keyword make_float: set to 0, if no floats are desired at all. If 1, all 281 entries will be converted to floats and on failure, 282 the string is returned instead 283 284 (default: 1) 285 @type make_float: bool 286 @keyword start_index: Start search for keyword on the line before this index 287 (ie data is returned from the first noncommented line 288 at or after this index) 289 290 (default: 1) 291 @type start_index: int 292 @keyword rindex: Only return element with this index. Default if full list 293 is to be returned. 294 295 (default: None) 296 @type rindex: int 297 298 @return: Requested data from the usr input (either list or single element) 299 @rtype: list 300 301 """ 302 303 keyword = keyword.upper() 304 data = [line 305 for line in readFile(os.path.join(path,filename),' ') 306 if ''.join(line).strip()] 307 i = int(start_index) 308 while ' '.join(data[i-1]).find(keyword) == -1: 309 i += 1 310 data_index = [line.strip('#') 311 for line in data[i-1] 312 if line.strip('#')].index(keyword) 313 try: 314 end_index = i 315 while data[end_index][0][0] != '#': 316 end_index += 1 317 except IndexError: 318 end_index = None 319 try: 320 if not make_float: 321 raise ValueError 322 else: 323 elements = [float(line[data_index]) 324 for line in data[i:end_index] 325 if line[0]] 326 except ValueError: 327 if remove_underscore: 328 elements = [line[data_index].replace('_',' ') 329 for line in data[i:end_index] 330 if line[0]] 331 else: 332 elements = [line[data_index] 333 for line in data[i:end_index] 334 if line[0]] 335 if not rindex is None: 336 return elements[rindex] 337 else: 338 return elements

339 340

341 -def readFile(filename,delimiter=None,replace_spaces=1):

342 343 """ 344 Read file, and return content with delimiter of choice. 345 346 The delimiter allows for lines to be split into a list of substrings. 347 348 @param filename: the full filename of to be read file 349 @type filename: string 350 351 @keyword delimiter: The delimiter, default if strings don't have to be 352 split into a list of substrings 353 354 (default: None) 355 @type delimiter: string 356 @keyword replace_spaces: Replace any number of spaces or tabs by just one 357 space. If delimiter == ' ', then replace_spaces is 358 always active. 359 360 (default: 1) 361 @type replace_spaces: bool 362 363 @return: The lines in the file are returned, either as the full line or 364 split into substrings, depending on the delimiter 365 @rtype: list[string] or list[list[string]] 366 367 """ 368 369 if delimiter == ' ': 370 replace_spaces = 1 371 FILE = open(filename,'r') 372 lines = FILE.readlines() 373 FILE.close() 374 return [line 375 for line in splitLines(lines,delimiter,replace_spaces) 376 if ' '.join(line)]

377 378 379

380 -def readDict(filename=None,lines=None,delimiter='=',comment_chars=['#'],\ 381 convert_lists=0,convert_floats=0,convert_ints=0,multi_keys=[],\ 382 start_row=0,end_row=None,key_modifier=None):

383 384 ''' 385 Read a file as a dictionary. 386 387 Commented lines and lines without the delimiter are ignored. 388 389 If given keywords are present more than once, the entries for those 390 keywords are returned as a list in the dictionary with key equal to the 391 keyword. The given keywords are defined by the list multi_keys 392 393 @keyword filename: the filename of the file to be read. In case of default, 394 lines are required. filename takes precedence over lines. 395 396 (default: None) 397 @type filename: string 398 @keyword lines: Skip reading a file, and parse these lines instead. 399 If default, a filename is required. Is a list of strings, 400 each interpreted as a line. Ignored if filename is given. 401 402 (default: None) 403 @type lines: list[str] 404 @keyword delimiter: the delimiter defining the key/value pairs 405 406 (default: '=') 407 @type delimiter: string 408 @keyword comment_chars: single character strings setting the comment 409 characters 410 411 (default: ['#']) 412 @type comment_chars: list of strings 413 @keyword convert_lists: convert strings that include lists to real lists 414 415 (default: 0) 416 @type convert_lists: bool 417 @keyword convert_floats: convert input values to floats if possible 418 419 (default: 0) 420 @type convert_floats: bool 421 @keyword convert_ints: convert input values to ints if number%1 is not 0 422 only works if also convert_float == 1 423 424 (default: 0) 425 @type convert_ints: bool 426 @keyword multi_keys: Defines the keywords which may be present multiple 427 times in the dictionary. They are included in the dict 428 as a list even if they are present only once. 429 430 (default: []) 431 @type multi_keys: list(string) 432 @keyword start_row: Limit the text file to lines starting from this index. 433 434 (default: 0) 435 @type start_row: int 436 @keyword end_row: Limit the text file to lines ending before this index 437 (ie last line is end_index - 1). Default includes up to 438 the last line of the file. 439 440 (default: None) 441 @type end_row: int 442 @keyword key_modifier: A function that modifies the key, such as lower(). 443 Any method that works on a string in principle works. 444 Give the function as a string. 445 446 (default: None) 447 @type key_modifier: str 448 449 @return: the dictionary with the info from the file. 450 @rtype: dict 451 452 ''' 453 454 if filename: 455 lines = readFile(filename) 456 lines = lines[start_row:end_row] 457 lines, comments = removeComments(lines,comment_chars=comment_chars) 458 459 #-- Make sure the final character in a value definition doesn't drop off 460 # when splitting the line in case there's no comment character on the line 461 newdict = dict() 462 all_keys = [line.split(delimiter,1)[0].strip() 463 for line in lines 464 if len(line.split(delimiter,1)) == 2] 465 all_vals = [line.split(delimiter,1)[1].strip() 466 for line in lines 467 if len(line.split(delimiter,1)) == 2] 468 469 #-- Apply a key modifier if requested. 470 if key_modifier: 471 all_keys = [getattr(k,key_modifier)() for k in all_keys] 472 473 for mkey in multi_keys: 474 mkey_count = all_keys.count(mkey) 475 if mkey_count > 0: 476 newdict[mkey] = [v for k,v in zip(all_keys,all_vals) if k == mkey] 477 newdict.update(dict([(k,v) 478 for k,v in zip(all_keys,all_vals) 479 if k not in multi_keys])) 480 if convert_floats: 481 newdict = dict([(k,convertFloat(v,convert_int=convert_ints)) 482 for k,v in newdict.items()]) 483 if convert_lists: 484 for k,v in newdict.items(): 485 if isinstance(v,str) and v.find('[') != -1: 486 v = v.strip('[').strip(']') 487 newv = [] 488 while v.find('(') != -1: 489 tu = v[v.find('(')+1:v.find(')')].split(',') 490 tu = tuple([convertString(t) for t in tu]) 491 newv.append(len(tu) != 1 and tu or tu[0]) 492 v = v[v.find(')')+1:].lstrip(',') 493 if not newv: 494 newv = [convertString(t) for t in v.split(',')] 495 if newv == ['']: 496 newv = [] 497 if convert_floats: 498 converted = [] 499 for newvi in newv: 500 if isinstance(newvi,tuple): 501 converted.append(tuple([convertFloat(tui,\ 502 convert_int=convert_ints) 503 for tui in newvi])) 504 else: 505 converted.append(convertFloat(newvi,\ 506 convert_int=convert_ints)) 507 newv = converted 508 newdict[k] = newv 509 elif v == 'None': 510 newdict[k] = None 511 512 return newdict

513 514 515

516 -def convertInt(number):

517 518 ''' 519 Convert a float to an integer. 520 521 Is only done if the float%1 != 0. Otherwise, it remains a float. 522 523 @param number: The float/string to be converted. 524 @type number: float or string 525 526 @return: The converted integer is returned, or the input float if 527 float%1 is 0. 528 @rtype: float or int 529 530 ''' 531 532 return float(number)%1 != 0 and float(number) or int(float(number))

533 534 535

536 -def convertString(string):

537 538 ''' 539 Convert a string to a string, where 'None' is converted to None. 540 541 @param string: The string to be converted. 542 @type string: string 543 544 @return: The new string (identical to input) or None 545 @rtype: string 546 547 ''' 548 549 if string == 'None': 550 return None 551 else: 552 return string

553 554 555

556 -def convertFloat(string,nans=0,convert_int=0):

557 558 ''' 559 Convert a string to a float. 560 561 If the string cannot be converted, the string itself or a nan is 562 returned. 563 564 It is possible to pass objects other than strings and numbers as 'string'. 565 In that case a TypeError for the float conversion is raised, and the object 566 is returned untouched. The nan conversion only happens if a ValueError is 567 raised, ie it was indeed a string that does not represent a number. 568 569 @param string: The string to be converted. 570 @type string: string 571 572 @keyword nans: Convert the string to nan if it cannot be converted to float 573 574 (default: 0) 575 @type nans: bool 576 @keyword convert_int: Convert the float to integer straight away if 577 float(string)%1 != 0 578 579 (default: 0) 580 @type convert_int: bool 581 582 @return: The converted float is returned, or the input string if 583 conversion failed 584 @rtype: float or string 585 586 ''' 587 588 try: 589 try: 590 if convert_int: 591 return convertInt(string) 592 else: 593 return float(string) 594 except TypeError: 595 return string 596 except ValueError: 597 return nans and float('nan') or string

598 599 600

601 -def inputToString(val,make_int=0,exp_not=0):

602 603 """ 604 Convert an input value to a string. 605 606 @param val: The input value 607 @type val: str, int, float 608 609 @keyword make_int: Turn the input value into an integer 610 611 (default: 0) 612 @type make_int: bool 613 @keyword exp_not: Convert to exponential notation in a string 614 615 (default: 0) 616 @type exp_not: bool 617 618 @return: The converted input value 619 @rtype: string 620 621 """ 622 623 if exp_not: 624 return make_int and '%.2e'%(int(float(val))) or '%.2e'%(float(val)) 625 else: 626 return make_int and str(int(float(val))) or str(val)

627 628 629

630 -def printRecArray(recarr,precision=8):

631 632 """ 633 Print a record array in a mannerly fashion. 634 635 @param recarr: The record array. 636 @type recarr: recarray 637 638 @keyword precision: The precision of the floats shown when printed 639 640 (default: 8) 641 @type precision: int 642 643 """ 644 645 print mlab.rec2txt(recarr,precision=precision)

646 647 648

649 -def removeComments(lines,comment_chars=['#','!',';']):

650 651 ''' 652 Split input from comments and return both as separate lists. 653 654 Takes a list of strings, such as what readFile returns. 655 656 @param lines: The strings in a file 657 @type lines: list[str] 658 659 @keyword comment_chars: single character strings setting the comment 660 characters 661 662 (default: ['#','!']) 663 @type comment_chars: list[str] 664 665 @return: The input and the comment lines in two separate lists 666 @rtype: (list,list) 667 668 ''' 669 670 if len(comment_chars) > 1: 671 for char in comment_chars[1:]: 672 lines = [line.replace(char,comment_chars[0]) for line in lines] 673 data = [line.partition(comment_chars[0])[0] for line in lines] 674 comments = [line.partition(comment_chars[0])[2] for line in lines] 675 return (data,comments)

676 677 678

679 -def readCols(filename,delimiter=' ',make_float=1,start_row=0,make_array=1,\ 680 nans=0,start_from_keyword='',return_comments=0,\ 681 comment_chars=['#','!',';'],end_row=None):

682 683 ''' 684 Read columns, remove comments and turn into floats. 685 686 Note that number of columns returned is the minimum number of columns from 687 all rows, where the columns are split by the chosen delimiter (space-like 688 by default). 689 690 @param filename: The full filename and path of the file 691 @type filename: string 692 693 @keyword delimiter: delimiter between the columns 694 695 (default: ' ') 696 @type delimiter: string 697 @keyword make_float: turn everything into floats 698 699 (default: 1) 700 @type make_float: bool 701 @keyword start_row: Limit the text file to lines starting from this row. 702 If start_from_keyword is used, start_row counts from the 703 index where the keyword is first found. 704 705 (default: 0) 706 @type start_row: int 707 @keyword make_array: return numpy arrays instead of python lists 708 709 (default: 1) 710 @type make_array: bool 711 @keyword nans: convert any non-float input value to a numpy.NaN. If False, 712 the strings are returned instead. 713 714 (default: 0) 715 @type nans: bool 716 @keyword start_from_keyword: Start returning data from the line that 717 contains a given keyword. Only used if not 718 default. The start_row parameter counts from 719 this index onward. (not case sensitive) 720 721 (default: '') 722 @type start_from_keyword: string 723 @keyword return_comments: Return the comments list in addition to the data 724 725 (default: 0) 726 @type return_comments: bool 727 @keyword comment_chars: single character strings setting the comment 728 characters 729 730 (default: ['#','!',';']) 731 @type comment_chars: list[str] 732 @keyword end_row: Limit the text file to lines ending before this index 733 (ie last line is end_index - 1). Default includes up to 734 the last line of the file. If start_from_keyword is given, 735 end_row counts with respect to the found index. 736 737 (default: None) 738 @type end_row: int 739 740 @return: The columns are returned, with in addition the comments if 741 requested 742 @rtype: list[list or array] or (list[list or array],list[str]) 743 744 ''' 745 746 lines = readFile(filename) 747 if str(start_from_keyword): 748 #-- Find occurrences of searchstring 749 start_from_keyword = start_from_keyword.upper() 750 indices = [i for i,line in enumerate(lines) 751 if line.upper().find(start_from_keyword) != -1] 752 #-- If any were found, grab the first and cut the lines above it 753 if indices: lines = lines[indices[0]:] 754 755 #-- Cut anything above start row, up to the end_row 756 lines = lines[start_row:end_row] 757 758 #-- Remove the comments and empty lines, then split the lines 759 lines,comments = removeComments(lines,comment_chars=comment_chars) 760 lines = [line for line in lines if line] 761 lines = splitLines(lines,delimiter=delimiter) 762 if not lines: 763 print 'WARNING from DataIO.readCols! No numerical data ' + \ 764 'are available in %s. Returning empty list.'%filename 765 return [] 766 767 #-- Apply requests for floatsand arrays and return. 768 if make_float: 769 lines = [[convertFloat(l,nans=nans) for l in line] for line in lines] 770 ndata = min([len(line) for line in lines]) 771 if make_array and make_float: 772 lines = [array([line[i] for line in lines]) for i in xrange(ndata)] 773 else: 774 lines = [[line[i] for line in lines] for i in xrange(ndata)] 775 return return_comments and (lines,comments) or lines

776 777 778

779 -def splitLines(lines,delimiter=None,replace_spaces=1):

780 781 """ 782 Split lines based on a delimiter of choice, which can be None. 783 784 @param lines: The lines to be split up 785 @type lines: list[string] 786 787 @keyword delimiter: The delimiter, default if strings don't have to be 788 split into a list of substrings 789 790 (default: None) 791 @type delimiter: string 792 @keyword replace_spaces: replace any number of spaces/tabs by just 1 space 793 794 (default: 1) 795 @type replace_spaces: bool 796 @return: The lines split up in substrings if delimiter is not None. 797 Otherwise, the strings are returned as they are, or with spaces 798 replaced, depending on the replace_spaces keyword. Empty lines are 799 always removed 800 @rtype: list[string] or list[list[string]] 801 802 """ 803 804 #- Make sure a space/tab appears as one space only, if replace_spaces == 1 805 if not delimiter is None and replace_spaces: 806 return [" ".join(line.split()).split(delimiter) 807 for line in lines 808 if line] 809 elif delimiter is None and replace_spaces: 810 return [" ".join(line.split()) for line in lines if line] 811 elif delimiter is None and not replace_spaces: 812 return [line for line in lines if line] 813 elif not delimiter is None and not replace_spaces: 814 return [line.split(delimiter) for line in lines if line]

815 816 817

818 -def writeFile(filename,input_lines,mode='w',delimiter='\n'):

819 820 """ 821 Write file with a list of strings as input. 822 823 @param filename: filename of file, includes data type extension 824 @type filename: string 825 @param input_lines: The lines to be written 826 @type input_lines: list[string] 827 828 @keyword delimiter: The line separator. Typically the new line character, 829 but can be changed if needed (eg empty string in case 830 new line character is already included in the input 831 lines) 832 833 (default: '\\n') 834 @type delimiter: string 835 @keyword mode: writing mode ('w' is new file, 'a' appends to existing file) 836 837 (default: 'w') 838 @type mode: string 839 840 """ 841 842 #- Check existence folder only if new file is made 843 if mode == 'w': 844 testFolderExistence(os.path.split(filename)[0]) 845 FILE = open(filename,mode) 846 FILE.write(delimiter.join(input_lines)) 847 FILE.close()

848 849 850

851 -def replaceString(filename,old_str,new_str):

852 853 ''' 854 Replace a given string with another in requested filename. 855 856 The filename can contain whatever works for the glob function, such as the 857 wildcard character, to do multiple files in one go. 858 859 @param filename: The filename, possibly with a wildcard character 860 @type filename: string 861 @param old_str: The old string to be replace in all the files 862 @type old_str: str 863 @param new_str: The new string to be inserted in all the files 864 @type new_str: str 865 866 ''' 867 868 gg = glob(filename) 869 for gf in gg: 870 old_lines = readFile(gf,replace_spaces=0) 871 new_lines = [ll.replace(old_str,new_str) for ll in old_lines] 872 writeFile(filename=gf,input_lines=new_lines,delimiter='')

873 874 875

876 -def writeCols(filename,cols,mode='w',delimiter='\t'):

877 878 """ 879 Write columns of data. 880 881 @param filename: filename of target file for writing 882 @type filename: string 883 @param cols: columns to be written, every column given separately in list 884 @type cols: list[list or array] 885 886 @keyword mode: writing mode ('w' is new file, 'a' appends to existing file) 887 888 (default: 'w') 889 @type mode: string 890 @keyword delimiter: The delimiter used between columns 891 892 (default: '\t') 893 @type delimiter: string 894 895 """ 896 897 #- Check existence folder only if new file is made 898 if mode == 'w': 899 testFolderExistence(os.path.split(filename)[0]) 900 FILE = open(filename,mode) 901 FILE.write('\n'.join([delimiter.join(\ 902 [isinstance(col[i],str) and '%s'%col[i] \ 903 or '%.3e'%col[i] 904 for col in cols]) 905 for i in xrange(len(cols[0]))])) 906 FILE.close()

907 908 909

910 -def findNumber(index,floats):

911 912 """ 913 Starting from index, find the index of the next number different from zero 914 in the list. Can be at index itself! 915 916 Cannot work with non-float or non-convertible-to-float input values. 917 918 If at the end of the list, the index is returned as the length of the list. 919 920 @param index: The starting index for the search 921 @type index: int 922 @param floats: The floats being searched for a non-zero number 923 @type floats: list[floats] 924 925 @return: The index of the next number different from zero, can be param 926 index itself. 927 @rtype: int 928 929 """ 930 931 while index < len(floats) and float(floats[index]) == 0.0: 932 index += 1 933 return index

934 935 936

937 -def findZero(index,floats):

938 939 """ 940 Starting from index, find the index of the next number equal to zero in 941 the list. Can be at index itself! 942 943 If at the end of the list, the index is returned as the length of the list. 944 945 @param index: The starting index for the search 946 @type index: int 947 @param floats: The floats being searched for a zero number 948 @type floats: list[floats] 949 950 @return: The index of the next number equal to zero, can be param index 951 itself. 952 @rtype: int 953 954 """ 955 956 while index < len(floats) and float(floats[index]) != 0.0: 957 index += 1 958 return index

959 960 961

962 -def findFloat(index,vals):

963 964 ''' 965 Starting from index, find the index of the next float in the list, zero or 966 non-zero. Can take strings as input! Goal is to browse until an input value 967 convertible to float is found. 968 969 In case that no float is found before the end of the file, the length of 970 the list is returned. 971 972 If the given index is larger than the length of the list, the length is 973 also returned. 974 975 @param index: The starting index in the list 976 @type index: int 977 @param vals: the list of strings and floats 978 @type vals: list 979 @return: the index of the next float in the list 980 @rtype: int 981 982 ''' 983 984 while True: 985 if index >= len(vals): 986 return len(vals) 987 try: 988 dummy = float(vals[index].replace('D+','E+').replace('D-','E-')) 989 break 990 except ValueError: 991 index += 1 992 return index

993 994 995

996 -def findString(index,vals):

997 998 ''' 999 Starting from index, find the index of the next String in the list. 1000 1001 In case that no float is found before the end of the file, the length of 1002 the list is returned. 1003 1004 If the given index is larger than the length of the list, the length is 1005 also returned. 1006 1007 @param index: The starting index in the list 1008 @type index: int 1009 @param vals: the list of strings and floats 1010 @type vals: list 1011 1012 @return: the index of the next float in the list 1013 @rtype: int 1014 1015 ''' 1016 1017 while True: 1018 if index >= len(vals): 1019 return len(vals) 1020 try: 1021 dummy = float(vals[index].replace('D+','E+').replace('D-','E-')) 1022 index += 1 1023 except ValueError: 1024 break 1025 return index

1026 1027 1028

1029 -def testFolderExistence(filepath):

1030 1031 """ 1032 Checking if requested folder exists and creating it if not. 1033 1034 @param filepath: the folder name 1035 @type filepath: string 1036 1037 """ 1038 1039 if filepath and not os.path.isdir(filepath): 1040 subprocess.call(['mkdir ' + filepath],shell=True) 1041 print 'Made directory: ' + filepath

1042 1043 1044

1045 -def joinPdf(old,new,del_old=1):

1046 1047 ''' 1048 Join .pdf files into a single .pdf and remove the separate ones. 1049 1050 Requires the pdftk software installed on the system. 1051 1052 @param old: The input filenames of the .pdf files to be joined 1053 @type old: list[string] 1054 @param new: The filename of the joined .pdf file 1055 @type new: string 1056 1057 @keyword del_old: delete the old filenames 1058 1059 (default: 1) 1060 @type del_old: bool 1061 1062 ''' 1063 1064 pp = PdfFileMerger() 1065 for ofn in old: 1066 pp.append(ofn) 1067 pp.write(new) 1068 pp.close() 1069 if bool(del_old): 1070 subprocess.call([' '.join(['rm']+old)],shell=True)

1071 1072 1073

1074 -def checkLink(path,ln_path,folder=1):

1075 1076 ''' 1077 Check if a link exists between two paths, and if not create it. 1078 1079 @param path: The name of the path the link refers to 1080 @type path: string 1081 @param ln_path: The name of the link 1082 @type ln_path: string 1083 1084 @keyword folder: if you're linking a folder, instead of a file. If False, 1085 a folder check will not be done. 1086 1087 (default: 1) 1088 @type folder: bool 1089 1090 ''' 1091 1092 if not os.path.islink(os.path.join(ln_path,\ 1093 os.path.split(path.rstrip('/'))[1])): 1094 if folder: testFolderExistence(path) 1095 subprocess.call([' '.join(['ln','-s',path,ln_path])],shell=True)

1096 1097 1098

1099 -def fillOutSpaces(string,nchars):

1100 1101 ''' 1102 Fill out a string to a set number of characters with spaces. 1103 1104 @param string: The string to be filled out, if nchars is < than len(string) 1105 the string itself is returned, unchanged. 1106 @type string: str 1107 @param nchars: The number of characters requested in final string 1108 @type nchars: int 1109 1110 @return: The filled out string. 1111 @rtype: str 1112 1113 ''' 1114 1115 string, nchars = str(string), int(nchars) 1116 if nchars > len(string): 1117 string += ''.join([' ']*(nchars-len(string))) 1118 return string

1119 1120 1121

1122 -def checkEntryInfo(input_list,number_of_keys,info_type):

1123 1124 ''' 1125 Specific input keywords for ComboCode (currently: MOLECULE, TRANSITION, 1126 R_POINTS_MASS_LOSS) require multiple arguments separated by a space. This 1127 method sorts those arguments and checks if the format is OK. 1128 1129 The method returns a tuple or a list of these arguments depending on 1130 multiplicity of input, and the need to create a grid for the parameter. 1131 1132 @param input_list: argument strings for one of the special CC input keys. 1133 @type input_list: list[string] 1134 @param number_of_keys: the number of arguments expected 1135 @type number_of_keys: int 1136 @param info_type: MOLECULE, TRANSITION or R_POINTS_MASS_LOSS 1137 @type info_type: string 1138 1139 @return: the sorted arguments for the requested info_type. The output is 1140 given as a list or a tuple, depending on if this gridded parameter 1141 or not, respectively. 1142 @rtype: list[tuple(list[string])] or tuple(list[string]) 1143 1144 ''' 1145 1146 if not input_list: 1147 return [] 1148 1149 if not info_type in ('MOLECULE','R_POINTS_MASS_LOSS','TRANSITION'): 1150 raise KeyError('The info_type keyword is unknown. Should be ' + \ 1151 'MOLECULE, TRANSITION or R_POINTS_MASS_LOSS.') 1152 input_list = [line.split() for line in input_list] 1153 1154 #-- Make sure identical transitions don't get confused if offset 0 is 1155 # given in different ways, such as more significant numbers. Offset is 1156 # always the entry with index 10. 1157 def checkOffset(line): 1158 if float(line[10]) == 0.0: 1159 line[10] = '0.0' 1160 return line

1161 if info_type == 'TRANSITION': 1162 input_list = [checkOffset(line) for line in input_list] 1163 1164 if set([len(line) for line in input_list]) != set([number_of_keys]): 1165 print 'Number of keys should be: %i'%number_of_keys 1166 print '\n'.join(['%i for %s'%(len(line),line) for line in input_list]) 1167 raise IOError('Input for one of the %s lines has wrong '%info_type + \ 1168 'number of values. Double check, and abort.') 1169 else: 1170 #-- if MOLECULE: only molecule string, 1171 # if R_POINTS_MASS_LOSS: only grid id number, 1172 # if TRANSITION: everything except last entry (n_quad) 1173 entries = [info_type in ('MOLECULE','R_POINTS_MASS_LOSS') \ 1174 and line[0] or ' '.join(line[0:-1]) 1175 for line in input_list] 1176 unique_entries = set(entries) 1177 #-- ie the defining parameter is never multiply defined ! 1178 # Hence, only a single set of parameters is used here. 1179 if len(unique_entries) == len(entries): 1180 if info_type == 'R_POINTS_MASS_LOSS': 1181 input_list = [' '.join(il[1:]) for il in input_list] 1182 return tuple(input_list) 1183 else: 1184 if info_type == 'TRANSITION': 1185 indices = [i 1186 for i,entry in enumerate(entries) 1187 if entries.count(entry) > 1] 1188 print 'Identical transition(s) in the transition list: Doubl'+\ 1189 'es will not be removed even if N_QUAD is the same too!' 1190 for i in indices: 1191 print 'At index %i: %s' %(i,entries[i]) 1192 raw_input('Abort if identical transitions are not expected. '+\ 1193 'Press enter otherwise.') 1194 if info_type == 'R_POINTS_MASS_LOSS': 1195 #-- This will be a list of R_POINTS_MASS_LOSS sets, where each 1196 # set is defined as a list of radial grid point parameters 1197 final = [] 1198 while input_list: 1199 if int(input_list[0][0]) != 1: 1200 raise IOError('The grid point ID numbers for the ' + \ 1201 'R_POINTS_MASS_LOSS keywords do not ' + \ 1202 'follow a correct order. Use ID = 1 ' + \ 1203 'as a reset for new set of grid points.') 1204 final.append([input_list.pop(0)]) 1205 while input_list and int(input_list[0][0]) != 1: 1206 final[-1].append(input_list.pop(0)) 1207 #-- Remove the grid point ID numbers, not relevant anymore 1208 # put the rest back into a string 1209 final = [tuple([' '.join([num 1210 for i,num in enumerate(this_point) 1211 if i != 0]) 1212 for this_point in this_set]) 1213 for this_set in final] 1214 return final 1215 final = [[line 1216 for line,entry in zip(input_list,entries) 1217 if entries.count(entry) == 1]] 1218 multiples = set([entry 1219 for entry in entries 1220 if entries.count(entry) != 1]) 1221 for entry in multiples: 1222 this_input = [line 1223 for this_entry,line in zip(entries,input_list) 1224 if this_entry == entry] 1225 final = [this_list + [extra_line] 1226 for extra_line in this_input 1227 for this_list in final] 1228 return [tuple(final_list) for final_list in final] 1229 1230

1231 -def getChemistryAbundances(filename):

1232 1233 ''' 1234 Reads in the Chemistry abundance output, works for both 1235 fractional abundances and number densities. 1236 1237 @param filename: The filename of the abundance output 1238 (csfrac.out or csnum.out) 1239 @type filename: string 1240 1241 @return: Recursive array containing the abundance per species name. 1242 @rtype: recarray 1243 1244 ''' 1245 1246 ###- Open file, read in all columns 1247 ##data = DataIO.readCols(filename,start_row=1) 1248 ###- Join them into one output array 1249 ##C = [] 1250 ##for c in data[1:]: 1251 ##C = np.concatenate((C,c),axis = 0) 1252 1253 #- Open file, read in all lines 1254 f = open(filename, 'r') 1255 lines = f.read().splitlines() 1256 f.close() 1257 1258 #- Remove first line, empty strings within lists, and empty lists 1259 data = [filter(None, line.split(' ')) for line in lines[1:]] 1260 data = filter(None, data) 1261 1262 #- Number of columns is constant throughout the file 1263 if len(set([len(d) for d in data])) == 1: 1264 data = DataIO.readCols(filename,start_row=1) 1265 1266 #- Join them into one output array 1267 C = [] 1268 for c in data[1:]: 1269 C = np.concatenate((C,c),axis = 0) 1270 1271 #- Number of calculations per species 1272 c0 = data[0] 1273 L = np.where(np.array(c0) == c0[0])[0][1]+1 1274 1275 1276 #- If the number of columns varies througout the file (e.g. by adding 1277 # species), run a more elaborate method to read in the columns 1278 else: 1279 #- Read in first block of 10 columns and concatenate 1280 limit = [i for i,d in enumerate(data) if len(d) != 10][0] 1281 blok = zip(*data[:limit]) 1282 C = [] 1283 for c in blok[1:]: 1284 C = np.concatenate((C,c),axis = 0) 1285 1286 # Add the appendix (with less then 10 columns) 1287 app = zip(*data[limit:]) 1288 for c in app[1:]: 1289 C = np.concatenate((C,c),axis = 0) 1290 1291 #- Number of calculations per species 1292 c0 = blok[0] 1293 L = np.where(np.array(c0) == c0[0])[0][1]+1 1294 1295 #- Put the names of the species in an array 1296 names = [] 1297 for ii in range(len(C)): 1298 if ii%L == 0: 1299 names.append(C[ii]) 1300 N = len(names) 1301 1302 #- Radii of calculation 1303 radius = np.array(c0[1:L-1]) 1304 radius = radius.astype(np.float) 1305 1306 #- Output array: [species,[output]] 1307 species = np.recarray(shape = [L-2,], dtype = zip(names, [float]*N)) 1308 for ii in range(N): 1309 species[names[ii]] = C[(ii*L)+1:((ii+1)*L)-1].astype(float) 1310 1311 return species

1312 1313

1314 -def getChemistryPhysPar(filename, keyword):

1315 1316 ''' 1317 Reads in the Chemistry physical output 1318 1319 @param filename: The filename of the abundance output 1320 (csphyspar.out) 1321 @type filename: string 1322 1323 @keyword keyword: The physical parameter in question. Options are: 1324 RADIUS, n(H2), TEMP. A_V, RAD. FIELD, 1325 CO K(PHOT), VELOCITY 1326 1327 @type keyword: string 1328 1329 @return: Recursive array containing the abundance per species name. 1330 @rtype: recarray 1331 1332 ''' 1333 1334 #- Open file, read in all columns 1335 data = DataIO.readCols(filename,start_row=1) 1336 1337 #- Initialise keyword 1338 keyword = keyword.upper() 1339 1340 #- Select right columm 1341 c = [i for i,s in enumerate(data) if keyword in s[0]][0] 1342 par = [float(p) for p in data[c][1:]] 1343 1344 return par

1345 1346

1347 -def getChemistrySpecies(filename,parents=1):

1348 1349 ''' 1350 Reads the species and parent species included in the Chemistry code. 1351 1352 @param filename: The .specs file 1353 @type filename: string 1354 @keyword parents: Give parent species as output. 1355 If parents=0, all species are given. 1356 (default: 1) 1357 1358 @return: (Parent) species included in the Chemistry code. 1359 @rtype: list 1360 1361 ''' 1362 1363 #- Read in file 1364 data = DataIO.readFile(filename) 1365 data = [x.split() for x in data] 1366 1367 #- Determine the different sections in the species file 1368 separators = np.where([len(i)==2 for i in data])[0] 1369 1370 if parents: 1371 #- Select parent species 1372 parents = data[separators[2]:] 1373 parents = [parents[x][0] for x in range(len(parents))] 1374 else: 1375 #- Select species included 1376 species = data[1:separators[0]] 1377 species = [species[x][1] for x in range(len(species))] 1378 1379 return species

1380

Source Code for Module ComboCode.cc.tools.io.DataIO