#! /usr/bin/env python """Zip together photometry measurements for matching astronomical objects. Matching is done based on positions. Inputs are TBL photometry output files from APT. Output is a CSV file zipping together all the photometry from the given bands. Photometry is converted to magnitudes per algorithms provided by Varoujan Gorjian and the Spitzer calibration pages. Author: Mark Abajian (626) 395-1811 Infrared Processing and Analysis Center California Institute of Technology Pasadena, California Date: September 14, 2011 """ # The following code was taken from http://dev.pocoo.org/hg/sandbox/raw-file/tip/odict.py # -*- coding: utf-8 -*- """ odict ~~~~~ This module is an example implementation of an ordered dict for the collections module. It's not written for performance (it actually performs pretty bad) but to show how the API works. Questions and Answers ===================== Why would anyone need ordered dicts? Dicts in python are unordered which means that the order of items when iterating over dicts is undefined. As a matter of fact it is most of the time useless and differs from implementation to implementation. Many developers stumble upon that problem sooner or later when comparing the output of doctests which often does not match the order the developer thought it would. Also XML systems such as Genshi have their problems with unordered dicts as the input and output ordering of tag attributes is often mixed up because the ordering is lost when converting the data into a dict. Switching to lists is often not possible because the complexity of a lookup is too high. Another very common case is metaprogramming. The default namespace of a class in python is a dict. With Python 3 it becomes possible to replace it with a different object which could be an ordered dict. Django is already doing something similar with a hack that assigns numbers to some descriptors initialized in the class body of a specific subclass to restore the ordering after class creation. When porting code from programming languages such as PHP and Ruby where the item-order in a dict is guaranteed it's also a great help to have an equivalent data structure in Python to ease the transition. Where are new keys added? At the end. This behavior is consistent with Ruby 1.9 Hashmaps and PHP Arrays. It also matches what common ordered dict implementations do currently. What happens if an existing key is reassigned? The key is *not* moved. This is consitent with existing implementations and can be changed by a subclass very easily:: class movingodict(odict): def __setitem__(self, key, value): self.pop(key, None) odict.__setitem__(self, key, value) Moving keys to the end of a ordered dict on reassignment is not very useful for most applications. Does it mean the dict keys are sorted by a sort expression? That's not the case. The odict only guarantees that there is an order and that newly inserted keys are inserted at the end of the dict. If you want to sort it you can do so, but newly added keys are again added at the end of the dict. I initializes the odict with a dict literal but the keys are not ordered like they should! Dict literals in Python generate dict objects and as such the order of their items is not guaranteed. Before they are passed to the odict constructor they are already unordered. What happens if keys appear multiple times in the list passed to the constructor? The same as for the dict. The latter item overrides the former. This has the side-effect that the position of the first key is used because the key is actually overwritten: >>> odict([('a', 1), ('b', 2), ('a', 3)]) odict.odict([('a', 3), ('b', 2)]) This behavor is consistent with existing implementation in Python and the PHP array and the hashmap in Ruby 1.9. This odict doesn't scale! Yes it doesn't. The delitem operation is O(n). This is file is a mockup of a real odict that could be implemented for collections based on an linked list. Why is there no .insert()? There are few situations where you really want to insert a key at an specified index. To now make the API too complex the proposed solution for this situation is creating a list of items, manipulating that and converting it back into an odict: >>> d = odict([('a', 42), ('b', 23), ('c', 19)]) >>> l = d.items() >>> l.insert(1, ('x', 0)) >>> odict(l) odict.odict([('a', 42), ('x', 0), ('b', 23), ('c', 19)]) :copyright: (c) 2008 by Armin Ronacher and PEP 273 authors. :license: modified BSD license. """ from itertools import izip, imap from copy import deepcopy missing = object() class odict(dict): """ Ordered dict example implementation. This is the proposed interface for a an ordered dict as proposed on the Python mailinglist (proposal_). It's a dict subclass and provides some list functions. The implementation of this class is inspired by the implementation of Babel but incorporates some ideas from the `ordereddict`_ and Django's ordered dict. The constructor and `update()` both accept iterables of tuples as well as mappings: >>> d = odict([('a', 'b'), ('c', 'd')]) >>> d.update({'foo': 'bar'}) >>> d odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')]) Keep in mind that when updating from dict-literals the order is not preserved as these dicts are unsorted! You can copy an odict like a dict by using the constructor, `copy.copy` or the `copy` method and make deep copies with `copy.deepcopy`: >>> from copy import copy, deepcopy >>> copy(d) odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')]) >>> d.copy() odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')]) >>> odict(d) odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')]) >>> d['spam'] = [] >>> d2 = deepcopy(d) >>> d2['spam'].append('eggs') >>> d odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])]) >>> d2 odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', ['eggs'])]) All iteration methods as well as `keys`, `values` and `items` return the values ordered by the the time the key-value pair is inserted: >>> d.keys() ['a', 'c', 'foo', 'spam'] >>> d.values() ['b', 'd', 'bar', []] >>> d.items() [('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])] >>> list(d.iterkeys()) ['a', 'c', 'foo', 'spam'] >>> list(d.itervalues()) ['b', 'd', 'bar', []] >>> list(d.iteritems()) [('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])] Index based lookup is supported too by `byindex` which returns the key/value pair for an index: >>> d.byindex(2) ('foo', 'bar') You can reverse the odict as well: >>> d.reverse() >>> d odict.odict([('spam', []), ('foo', 'bar'), ('c', 'd'), ('a', 'b')]) And sort it like a list: >>> d.sort(key=lambda x: x[0].lower()) >>> d odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])]) .. _proposal: http://thread.gmane.org/gmane.comp.python.devel/95316 .. _ordereddict: http://www.xs4all.nl/~anthon/Python/ordereddict/ """ def __init__(self, *args, **kwargs): dict.__init__(self) self._keys = [] self.update(*args, **kwargs) def __delitem__(self, key): dict.__delitem__(self, key) self._keys.remove(key) def __setitem__(self, key, item): if key not in self: self._keys.append(key) dict.__setitem__(self, key, item) def __deepcopy__(self, memo=None): if memo is None: memo = {} d = memo.get(id(self), missing) if d is not missing: return d memo[id(self)] = d = self.__class__() dict.__init__(d, deepcopy(self.items(), memo)) d._keys = self._keys[:] return d def __getstate__(self): return {'items': dict(self), 'keys': self._keys} def __setstate__(self, d): self._keys = d['keys'] dict.update(d['items']) def __reversed__(self): return reversed(self._keys) def __eq__(self, other): if isinstance(other, odict): if not dict.__eq__(self, other): return False return self.items() == other.items() return dict.__eq__(self, other) def __ne__(self, other): return not self.__eq__(other) def __cmp__(self, other): if isinstance(other, odict): return cmp(self.items(), other.items()) elif isinstance(other, dict): return dict.__cmp__(self, other) return NotImplemented @classmethod def fromkeys(cls, iterable, default=None): return cls((key, default) for key in iterable) def clear(self): del self._keys[:] dict.clear(self) def copy(self): return self.__class__(self) def items(self): return zip(self._keys, self.values()) def iteritems(self): return izip(self._keys, self.itervalues()) def keys(self): return self._keys[:] def iterkeys(self): return iter(self._keys) def pop(self, key, default=missing): if default is missing: return dict.pop(self, key) elif key not in self: return default self._keys.remove(key) return dict.pop(self, key, default) def popitem(self, key): self._keys.remove(key) return dict.popitem(key) def setdefault(self, key, default=None): if key not in self: self._keys.append(key) dict.setdefault(self, key, default) def update(self, *args, **kwargs): sources = [] if len(args) == 1: if hasattr(args[0], 'iteritems'): sources.append(args[0].iteritems()) else: sources.append(iter(args[0])) elif args: raise TypeError('expected at most one positional argument') if kwargs: sources.append(kwargs.iteritems()) for iterable in sources: for key, val in iterable: self[key] = val def values(self): return map(self.get, self._keys) def itervalues(self): return imap(self.get, self._keys) def index(self, item): return self._keys.index(item) def byindex(self, item): key = self._keys[item] return (key, dict.__getitem__(self, key)) def reverse(self): self._keys.reverse() def sort(self, *args, **kwargs): self._keys.sort(*args, **kwargs) def __repr__(self): return 'odict.odict(%r)' % self.items() __copy__ = copy __iter__ = iterkeys #if __name__ == '__main__': # import doctest # doctest.testmod() # The previous code was taken from http://dev.pocoo.org/hg/sandbox/raw-file/tip/odict.py ############################################################################## # # # The following code was developed for NITARP Team Red Shift by Mark Abajian # # September 14, 2011 # # # ############################################################################## from decimal import Decimal import sys COMMA = ',' EMPTY = '' NEWLINE = '\n' TAB = '\t' # Spitzer IRAC flux zero points, in Janskys IRAC_FLUX_ZEROPOINT = [ Decimal('280.9'), Decimal('179.7'), Decimal('115.0'), Decimal('64.13') ] MIN_SIGNAL_TO_NOISE = 3.0 def main(): # Report error conditions if len(sys.argv) < 3: print 'You need to supply at least two file names.' sys.exit(1) if len(sys.argv) > 5: print 'You need to supply fewer than five file names.' sys.exit(1) # Determine files to be combined filenames = sys.argv[1:] numfiles = len(filenames) # Determine which channels these represent channel_ids = [] for file in filenames: if file.endswith('.tbl') and file[-5].isdigit(): channel_ids.append( int(file[-5]) ) else: print 'This program only handles files named _irac[1234].tbl.' sys.exit(1) channels = [] # Loop through list of files # We are assuming that this is a standard ASCII tbl file # output from APT v1.1.1 and above. We _could_ read # the headers to determine the units, etc., but instead, # we are assuming the following fields and units: # RA [decimal degrees] in position 0 # Dec [decimal degrees] in position 1 # Flux [microJanskys] in position 9 # Flux Uncertainty [microJanskys] in position 10 for file in filenames: print '\'{0}\''.format( file ) , # fetch the flux zero point for this channel in microJanskys flux_0 = IRAC_FLUX_ZEROPOINT[ channel_ids[ filenames.index(file) ] - 1 ] * Decimal(1000000) # Initialize the dictionary of magnitudes # The positions will be the keys magnitudes = odict() # Read in the file with open(file) as f: numrec = 0 numblank = 0 numcomment = 0 numheader = 0 numdata = 0 for line in f: numrec += 1 if line.strip() == EMPTY: # Ignore blank lines numblank += 1 continue elif line[0].isupper(): # Ignore comments numcomment += 1 continue elif line.split()[0].upper() == 'RA': # Examine header line numheader += 1 header_tokens = [ token.upper() for token in line.split() ] if header_tokens[1] != 'DEC' or header_tokens[9] != 'SOURCE_INTENSITY' or header_tokens[10] != 'SOURCE_UNC': # Unexpected token print 'Unexpected header line in file \'{0}\'.'.format( file ) sys.exit(1) continue elif line.strip()[0].isdigit(): # This is a data line numdata += 1 data_tokens = line.split() ra = data_tokens[0] dec = data_tokens[1] flux = data_tokens[9] flux_unc = data_tokens[10] position = ra+COMMA+dec if position in magnitudes: # This position has already been seen for this channel. Error. print 'Position \'{0} {1}\' has already been seen in this band. Quitting.'.format( ra, dec ) sys.exit(1) # Before we convert this flux to magnitude, it needs to pass our SNR criterion fluxd = Decimal( flux ) flux_uncd = Decimal( flux_unc ) if (fluxd/flux_uncd) <= MIN_SIGNAL_TO_NOISE: # Skip this object continue magnitude = -Decimal(5)/Decimal(2)*(fluxd/flux_0).log10() mag_unc = -Decimal(5)/Decimal(2)*((fluxd-flux_uncd)/fluxd).log10() magnitudes[ position ] = '{0:.4f}{1}{2:.5f}'.format( magnitude, COMMA, mag_unc ) else: # Unknown record type print 'Unable to parse line from file \'{0}\':'.format( file ) print '\'{0}\''.format( line ) sys.exit(1) #print 'numrec', numrec #print 'numblank', numblank #print 'numcomment', numcomment #print 'numheader', numheader #print 'numdata', numdata print ' {0} data records with {1} valid fluxes'.format( numdata, len(magnitudes) ) channels.append( magnitudes ) #print 'There are {0} positions recorded.'.format( len(magnitudes) ) #matches = [ key for key in magnitudes if len(magnitudes[key]) == numfiles ] #print 'band 1 has',len(channels[0]), 'keys, band 2 has', len(channels[1]), 'keys' matches = [ position for position in channels[0].keys() if position in channels[1].keys() ] for channel in channels[2:]: matches = [ position for position in matches if position in channel ] # matches = set( channels[0].keys() ).intersection( set( channels[1].keys() ) ) # for channel in channels[2:]: # matches = matches.intersection( set( channel.keys() ) ) print 'There were {0} matches.'.format( len(matches) ) #print matches #print magnitudes channel_headers = [ 'IRAC{0} Magnitude [{1}]{2}IRAC{0} Magnitude Unc [{1}]'.format( channel_id, 'mag', COMMA ) for channel_id in channel_ids ] header_record = COMMA.join( [ 'RA J2000 [degrees]', 'Dec J2000 [degrees]' ] + channel_headers ) with open( 'unified.out', 'w' ) as f: f.write( header_record+NEWLINE ) for position in matches: magnitudes = COMMA.join( [ channel[position] for channel in channels ] ) f.write( position+COMMA+magnitudes+NEWLINE ) if __name__ == '__main__': main()