Source code for pystellibs.simpletable

""" This file implements a Table class
    that is designed to be the basis of any format

Requirements
------------

* FIT format:
    * astropy:
        provides a replacement to pyfits
        pyfits can still be used instead but astropy is now the default

* HDF5 format:
    * pytables

RuntimeError will be raised when writing to a format associated with missing
package.


.. code-block::python

    >>> t = SimpleTable('path/mytable.csv')
    # get a subset of columns only
    >>> s = t.get('M_* logTe logLo U B V I J K')
    # set some aliases
    >>> t.set_alias('logT', 'logTe')
    >>> t.set_alias('logL', 'logLLo')
    # make a query on one or multiple column
    >>> q = s.selectWhere('logT logL', '(J > 2) & (10 ** logT > 5000)')
    # q is also a table object
    >>> q.plot('logT', 'logL', ',')
    # makes a simple plot
    >>> s.write('newtable.fits')
    # export the initial subtable to a new file
"""
from __future__ import (absolute_import, division, print_function)

__version__ = '3.0'
__all__ = ['AstroHelpers', 'AstroTable', 'SimpleTable', 'stats']

import sys
import math
from copy import deepcopy
import re
import itertools
from functools import wraps, partial
import numpy as np
from numpy import deg2rad, rad2deg, sin, cos, sqrt, arcsin, arctan2
from numpy.lib import recfunctions
import types

try:
    from astropy.io import fits as pyfits
except ImportError:
    import pyfits
except:
    pyfits = None

try:
    import tables
except ImportError:
    tables = None

try:
    import pandas as _pd
except ImportError:
    _pd = None


# ==============================================================================
# Python 3 compatibility behavior
# ==============================================================================
# remap some python 2 built-ins on to py3k behavior or equivalent
# Most of them become generators
import operator

PY3 = sys.version_info[0] > 2

if PY3:
    iteritems = operator.methodcaller('items')
    itervalues = operator.methodcaller('values')
    basestring = (str, bytes)
else:
    range = xrange
    from itertools import izip as zip
    iteritems = operator.methodcaller('iteritems')
    itervalues = operator.methodcaller('itervalues')
    basestring = (str, unicode)


# ==============================================================================
# Specials -- special functions
# ==============================================================================

def pretty_size_print(num_bytes):
    """
    Output number of bytes in a human readable format

    keywords
    --------
    num_bytes: int
        number of bytes to convert

    returns
    -------
    output: str
        string representation of the size with appropriate unit scale
    """
    if num_bytes is None:
        return

    KiB = 1024
    MiB = KiB * KiB
    GiB = KiB * MiB
    TiB = KiB * GiB
    PiB = KiB * TiB
    EiB = KiB * PiB
    ZiB = KiB * EiB
    YiB = KiB * ZiB

    if num_bytes > YiB:
        output = '%.3g YB' % (num_bytes / YiB)
    elif num_bytes > ZiB:
        output = '%.3g ZB' % (num_bytes / ZiB)
    elif num_bytes > EiB:
        output = '%.3g EB' % (num_bytes / EiB)
    elif num_bytes > PiB:
        output = '%.3g PB' % (num_bytes / PiB)
    elif num_bytes > TiB:
        output = '%.3g TB' % (num_bytes / TiB)
    elif num_bytes > GiB:
        output = '%.3g GB' % (num_bytes / GiB)
    elif num_bytes > MiB:
        output = '%.3g MB' % (num_bytes / MiB)
    elif num_bytes > KiB:
        output = '%.3g KB' % (num_bytes / KiB)
    else:
        output = '%.3g Bytes' % (num_bytes)

    return output


def _fits_read_header(hdr):
    """
    Convert pyfits header into dictionary with relevant values

    Parameters
    ----------

    hdr: pyftis.Header
        fits unit

    Returns
    -------
    header: dict
        header dictionary

    alias: dict
        aliases

    units: dict
        units

    comments: dict
        comments/description of keywords
    """
    header = {}
    alias = {}
    units = {}
    comments = {}

    # generic cards
    genTerms = ['XTENSION', 'BITPIX', 'NAXIS', 'NAXIS1',
                'NAXIS2', 'PCOUNT', 'GCOUNT', 'TFIELDS',
                'EXTNAME']
    fieldTerms = ['TTYPE', 'TFORM', 'TUNIT', 'ALIAS']

    # read col comments
    for k, name, comment in hdr.ascard['TTYPE*']:
        comments[name] = comment
        u = hdr.get(k.replace('TYPE', 'UNIT'), None)
        if u is not None:
            units[name] = u

    for k, val, _ in hdr.ascard['ALIAS*']:
        al, orig = val.split('=')
        alias[al] = orig

    # other specific keywords: COMMENT, HISTORY
    header_comments = []
    header_history = []
    for k, v in hdr.items():
        if (k not in genTerms) and (k[:5] not in fieldTerms):
            if (k == 'COMMENT'):
                header_comments.append(v)
            elif (k == 'HISTORY'):
                header_history.append(v)
            else:
                header[k] = v

    # COMMENT, HISTORY polish
    if len(header_comments) > 0:
        header['COMMENT'] = '\n'.join(header_comments)
    if len(header_history) > 0:
        header['HISTORY'] = '\n'.join(header_history)

    if 'EXTNAME' in hdr:
        header['NAME'] = hdr['EXTNAME']

    return header, alias, units, comments


def _fits_generate_header(tab):
    """ Generate the corresponding fits Header that contains all necessary info

    Parameters
    ----------

    tab: SimpleTable instance
        table

    Returns
    -------
    hdr: pyfits.Header
        header instance
    """
    # get column cards

    cards = []

    # names units and comments
    for e, k in enumerate(tab.keys()):
        cards.append(('TTYPE{0:d}'.format(e + 1), k, tab._desc.get(k, '')))
        u = tab._units.get(k, '')
        if u not in ['', 'None', None]:
            cards.append(('TUNIT{0:d}'.format(e + 1), tab._units.get(k, ''),
                          'unit of {0:s}'.format(k)))

    # add aliases
    for e, v in enumerate(tab._aliases.items()):
        cards.append( ('ALIAS{0:d}'.format(e + 1), '='.join(v), '') )

    if tab.header['NAME'] not in ['', 'None', None, 'No Name']:
        cards.append(('EXTNAME', tab.header['NAME'], ''))

    hdr = pyfits.Header(cards)

    for k, v in tab.header.items():
        if (v not in ['', 'None', None]) & (k != 'NAME'):
            if (k != 'COMMENT') & (k != 'HISTORY'):
                hdr.update(k, v)
            else:
                txt = v.split('\n')
                for j in txt:
                    if k == 'COMMENT':
                        hdr.add_comment(j)
                    elif k == 'HISTORY':
                        hdr.add_history(j)
    return hdr


def _fits_writeto(filename, data, header=None, output_verify='exception',
                  clobber=False, checksum=False):
    """
    Create a new FITS file using the supplied data/header.
    Patched version of pyfits to correctly include provided header

    Parameters
    ----------
    filename : file path, file object, or file like object
        File to write to.  If opened, must be opened in a writeable binary
        mode such as 'wb' or 'ab+'.

    data : array, record array, or groups data object
        data to write to the new file

    header : `Header` object, optional
        the header associated with ``data``. If `None`, a header
        of the appropriate type is created for the supplied data. This
        argument is optional.

    output_verify : str
        Output verification option.  Must be one of ``"fix"``, ``"silentfix"``,
        ``"ignore"``, ``"warn"``, or ``"exception"``.  May also be any
        combination of ``"fix"`` or ``"silentfix"`` with ``"+ignore"``,
        ``+warn``, or ``+exception" (e.g. ``"fix+warn"``).  See :ref:`verify`
        for more info.

    clobber : bool, optional
        If `True`, and if filename already exists, it will overwrite
        the file.  Default is `False`.

    checksum : bool, optional
        If `True`, adds both ``DATASUM`` and ``CHECKSUM`` cards to the
        headers of all HDU's written to the file
    """

    hdu = pyfits.convenience._makehdu(data, header)
    hdu.header.update(header.cards)
    if hdu.is_image and not isinstance(hdu, pyfits.PrimaryHDU):
        hdu = pyfits.PrimaryHDU(data, header=header)
    hdu.writeto(filename, clobber=clobber, output_verify=output_verify,
                checksum=checksum)


def _fits_append(filename, data, header=None, checksum=False, verify=True,
                 **kwargs):
    """
    Append the header/data to FITS file if filename exists, create if not.

    If only ``data`` is supplied, a minimal header is created.
    Patched version of pyfits to correctly include provided header

    Parameters
    ----------
    filename : file path, file object, or file like object
        File to write to.  If opened, must be opened for update (rb+) unless it
        is a new file, then it must be opened for append (ab+).  A file or
        `~gzip.GzipFile` object opened for update will be closed after return.

    data : array, table, or group data object
        the new data used for appending

    header : `Header` object, optional
        The header associated with ``data``.  If `None`, an appropriate header
        will be created for the data object supplied.

    checksum : bool, optional
        When `True` adds both ``DATASUM`` and ``CHECKSUM`` cards to the header
        of the HDU when written to the file.

    verify : bool, optional
        When `True`, the existing FITS file will be read in to verify it for
        correctness before appending.  When `False`, content is simply appended
        to the end of the file.  Setting ``verify`` to `False` can be much
        faster.

    kwargs
        Any additional keyword arguments to be passed to
        `astropy.io.fits.open`.
    """

    name, closed, noexist_or_empty = pyfits.convenience._stat_filename_or_fileobj(filename)

    if noexist_or_empty:
        #
        # The input file or file like object either doesn't exits or is
        # empty.  Use the writeto convenience function to write the
        # output to the empty object.
        #
        _fits_writeto(filename, data, header, checksum=checksum, **kwargs)
    else:
        hdu = pyfits.convenience._makehdu(data, header)
        hdu.header.update(header.cards)

        if isinstance(hdu, pyfits.PrimaryHDU):
            hdu = pyfits.ImageHDU(data, header)

        if verify or not closed:
            f = pyfits.convenience.fitsopen(filename, mode='append')
            f.append(hdu)

            # Set a flag in the HDU so that only this HDU gets a checksum when
            # writing the file.
            hdu._output_checksum = checksum
            f.close(closed=closed)
        else:
            f = pyfits.convenience._File(filename, mode='append')
            hdu._output_checksum = checksum
            hdu._writeto(f)
            f.close()


def _ascii_read_header(fname, comments='#', delimiter=None, commentedHeader=True,
                       *args, **kwargs):
    """
    Read ASCII/CSV header

    Parameters
    ----------
    fname: str or stream
        File, filename, or generator to read.
        Note that generators should return byte strings for Python 3k.

    comments: str, optional
        The character used to indicate the start of a comment;
        default: '#'.

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles

    Returns
    -------
    nlines: int
        number of lines from the header

    header: dict
        header dictionary

    alias: dict
        aliases

    units: dict
        units

    comments: dict
        comments/description of keywords

    names: sequence
        sequence or str, first data line after header, expected to be the column
        names.
    """
    if hasattr(fname, 'read'):
        stream = fname
    else:
        stream = open(fname, 'r')

    header = {}
    alias = {}
    units = {}
    desc = {}

    def parseStrNone(v):
        """ robust parse """
        _v = v.split()
        if (len(_v) == 0):
            return None
        else:
            _v = ' '.join(_v)
            if (_v.lower()) == 'none' or (_v.lower() == 'null'):
                return None
            else:
                return _v

    done = False
    oldline = None
    lasthdr = None
    nlines = 0
    header.setdefault('COMMENT', '')
    header.setdefault('HISTORY', '')
    while done is False:
        line = stream.readline()[:-1]  # getting rid of '\n'
        nlines += 1
        if (line[0] == comments):  # header part
            if (len(line) > 2):
                if line[1] == comments:  # column meta data
                    # column meta is expected to start with ##
                    k = line[2:].split('\t')
                    colname = k[0].strip()
                    colunit = None
                    colcomm = None
                    if len(k) > 1:
                        colunit = parseStrNone(k[1])
                    if len(k) > 2:
                        colcomm = parseStrNone(k[2])

                    if colunit is not None:
                        units[colname] = colunit
                    if colcomm is not None:
                        desc[colname] = colcomm
                else:
                    # header is expected as "# key \t value"
                    k = line[1:].split('\t')
                    if len(k) > 1:
                        key = k[0].strip()  # remove trainling spaces
                        val = ' '.join(k[1:]).strip()

                        if key in ('', None, 'None', 'NONE', 'COMMENT'):
                            header['COMMENT'] = header['COMMENT'] + '\n' + val
                        if key in ('HISTORY', ):
                            header['HISTORY'] = header['HISTORY'] + '\n' + val
                        elif 'alias' in key.lower():
                            # take care of aliases
                            al, orig = val.split('=')
                            alias[al] = orig
                        else:
                            header[key] = val
                        lasthdr = key
                    else:
                        header['COMMENT'] = header['COMMENT'] + '\n' + line[1:]
        else:
            done = True
            if commentedHeader and (oldline is not None):
                names = oldline.split(delimiter)
                nlines -= 1
                if lasthdr == names[0]:
                    header.pop(lasthdr)
            else:
                names = line.split(delimiter)
        oldline = line[1:]

    if not hasattr(fname, 'read'):
        stream.close()
    else:
        stream.seek(stream.tell() - len(line))
        nlines = 0  # make sure the value is set to the current position

    return nlines, header, units, desc, alias, names


def _hdf5_write_data(filename, data, tablename=None, mode='w', append=False,
                     header={}, units={}, comments={}, aliases={}, **kwargs):
    """ Write table into HDF format

    Parameters
    ----------
    filename : file path, or tables.File instance
        File to write to.  If opened, must be opened and writable (mode='w' or 'a')

    data: recarray
        data to write to the new file

    tablename: str
        path of the node including table's name

    mode: str
        in ('w', 'a') mode to open the file

    append: bool
        if set, tends to append data to an existing table

    header: dict
        table header

    units: dict
        dictionary of units

    alias: dict
        aliases

    comments: dict
        comments/description of keywords

    .. note::
        other keywords are forwarded to :func:`tables.openFile`
    """

    if hasattr(filename, 'read'):
        raise Exception("HDF backend does not implement stream")

    if append is True:
        mode = 'a'
    silent = kwargs.pop('silent', False)

    if isinstance(filename, tables.File):
        if (filename.mode != mode) & (mode != 'r'):
            raise tables.FileModeError('The file is already opened in a different mode')
        hd5 = filename
    else:
        hd5 = tables.openFile(filename, mode=mode)

    # check table name and path
    tablename = tablename or header.get('NAME', None)
    if tablename in ('', None, 'Noname', 'None'):
        tablename = '/data'

    w = tablename.split('/')
    where = '/'.join(w[:-1])
    name = w[-1]
    if where in ('', None):
        where = '/'
    if where[0] != '/':
        where = '/' + where

    if append:
        try:
            t = hd5.getNode(where + name)
            t.append(data.astype(t.description._v_dtype))
            t.flush()
        except tables.NoSuchNodeError:
            if not silent:
                print(("Warning: Table {0} does not exists.  \n A new table will be created").format(where + name))
            append = False

    if not append:
        t = hd5.createTable(where, name, data, **kwargs)

        # update header
        for k, v in header.items():
            if (k == 'FILTERS') & (float(t.attrs['VERSION']) >= 2.0):
                t.attrs[k.lower()] = v
            else:
                t.attrs[k] = v
        if 'TITLE' not in header:
            t.attrs['TITLE'] = name

        # add column descriptions and units
        for e, colname in enumerate(data.dtype.names):
            _u = units.get(colname, None)
            _d = comments.get(colname, None)
            if _u is not None:
                t.attrs['FIELD_{0:d}_UNIT'] = _u
            if _d is not None:
                t.attrs['FIELD_{0:d}_DESC'] = _d

        # add aliases
        for i, (k, v) in enumerate(aliases.items()):
            t.attrs['ALIAS{0:d}'.format(i)] = '{0:s}={1:s}'.format(k, v)

        t.flush()

    if not isinstance(filename, tables.File):
        hd5.flush()
        hd5.close()


def _hdf5_read_data(filename, tablename=None, silent=False, *args, **kwargs):
    """ Generate the corresponding ascii Header that contains all necessary info

    Parameters
    ----------
    filename: str
        file to read from

    tablename: str
        node containing the table

    silent: bool
        skip verbose messages

    Returns
    -------
    hdr: str
        string that will be be written at the beginning of the file
    """
    source = tables.openFile(filename, *args, **kwargs)

    if tablename is None:
        node = source.listNodes('/')[0]
        tablename = node.name
    else:
        if tablename[0] != '/':
            node = source.getNode('/' + tablename)
        else:
            node = source.getNode(tablename)
    if not silent:
        print("\tLoading table: {0}".format(tablename))

    hdr = {}
    aliases = {}

    # read header
    exclude = ['NROWS', 'VERSION', 'CLASS', 'EXTNAME', 'TITLE']
    for k in node.attrs._v_attrnames:
        if (k not in exclude):
            if (k[:5] != 'FIELD') & (k[:5] != 'ALIAS'):
                hdr[k] = node.attrs[k]
            elif k[:5] == 'ALIAS':
                c0, c1 = node.attrs[k].split('=')
                aliases[c0] = c1

    empty_name = ['', 'None', 'Noname', None]
    if node.attrs['TITLE'] not in empty_name:
        hdr['NAME'] = node.attrs['TITLE']
    else:
        hdr['NAME'] = '{0:s}/{1:s}'.format(filename, node.name)

    # read column meta
    units = {}
    desc = {}

    for (k, colname) in enumerate(node.colnames):
        _u = getattr(node.attrs, 'FIELD_{0:d}_UNIT'.format(k), None)
        _d = getattr(node.attrs, 'FIELD_{0:d}_DESC'.format(k), None)
        if _u is not None:
            units[colname] = _u
        if _d is not None:
            desc[colname] = _d

    data = node[:]

    source.close()

    return hdr, aliases, units, desc, data


def _ascii_generate_header(tab, comments='#', delimiter=' ',
                           commentedHeader=True):
    """ Generate the corresponding ascii Header that contains all necessary info

    Parameters
    ----------

    tab: SimpleTable instance
        table

    comments: str
        string to prepend header lines

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles

    Returns
    -------
    hdr: str
        string that will be be written at the beginning of the file
    """
    hdr = []

    if comments is None:
        comments = ''

    # table header
    length = max(map(len, tab.header.keys()))
    fmt = '{{0:s}} {{1:{0:d}s}}\t{{2:s}}'.format(length)
    for k, v in tab.header.items():
        for vk in v.split('\n'):
            if len(vk) > 0:
                hdr.append(fmt.format(comments, k.upper(), vk.strip()))

    # column metadata
    hdr.append(comments)  # add empty line
    length = max(map(len, tab.keys()))
    fmt = '{{0:s}}{{0:s}} {{1:{0:d}s}}\t{{2:s}}\t{{3:s}}'.format(length)
    for colname in tab.keys():
        unit = tab._units.get(colname, 'None')
        desc = tab._desc.get(colname, 'None')
        hdr.append(fmt.format(comments, colname, unit, desc))

    # aliases
    if len(tab._aliases) > 0:
        hdr.append(comments)  # add empty line
        for k, v in tab._aliases.items():
            hdr.append('{0:s} alias\t{1:s}={2:s}'.format(comments, k, v))

    # column names
    hdr.append(comments)
    if commentedHeader:
        hdr.append('{0:s} {1:s}'.format(comments, delimiter.join(tab.keys())))
    else:
        hdr.append('{0:s}'.format(delimiter.join(tab.keys())))

    return '\n'.join(hdr)


def _latex_writeto(filename, tab, comments='%'):
    """ Write the data into a latex table format

    Parameters
    ----------
    filename: str
        file or unit to write into

    tab: SimpleTable instance
        table

    comments: str
        string to prepend header lines

    delimiter: str, optional
        The string used to separate values.  By default, this is any
        whitespace.

    commentedHeader: bool, optional
        if set, the last line of the header is expected to be the column titles
    """
    txt = "\\begin{table}\n\\begin{center}\n"

    # add caption
    tabname = tab.header.get('NAME', None)
    if tabname not in ['', None, 'None']:
        txt += "\\caption{{{0:s}}}\n".format(tabname)

    # tabular
    txt += '\\begin{{tabular}}{{{0:s}}}\n'.format('c' * tab.ncols)
    txt += tab.pprint(delim=' & ', fields='MAG*', headerChar='', endline='\\\\\n', all=True, ret=True)
    txt += '\\end{tabular}\n'

    # end table
    txt += "\\end{center}\n"

    # add notes if any
    if len(tab._desc) > 0:
        txt += '\% notes \n\\begin{scriptsize}\n'
        for e, (k, v) in enumerate(tab._desc.items()):
            if v not in (None, 'None', 'none', ''):
                txt += '{0:d} {1:s}: {2:s} \\\\\n'.format(e, k, v)
        txt += '\\end{scriptsize}\n'
    txt += "\\end{table}\n"
    if hasattr(filename, 'write'):
        filename.write(txt)
    else:
        with open(filename, 'w') as unit:
            unit.write(txt)


def _convert_dict_to_structured_ndarray(data):
    """convert_dict_to_structured_ndarray

    Parameters
    ----------

    data: dictionary like object
        data structure which provides iteritems and itervalues

    returns
    -------
    tab: structured ndarray
        structured numpy array
    """
    newdtype = []
    try:
        for key, dk in iteritems(data):
            _dk = np.asarray(dk)
            dtype = _dk.dtype
            # unknown type is converted to text
            if dtype.type == np.object_:
                if len(data) == 0:
                    longest = 0
                else:
                    longest = len(max(_dk, key=len))
                    _dk = _dk.astype('|%iS' % longest)
            if _dk.ndim > 1:
                newdtype.append((str(key), _dk.dtype, (_dk.shape[1],)))
            else:
                newdtype.append((str(key), _dk.dtype))
        tab = np.rec.fromarrays(itervalues(data), dtype=newdtype)
    except AttributeError:  # not a dict
        # hope it's a tuple ((key, value),) pairs.
        from itertools import tee
        d1, d2 = tee(data)
        for key, dk in d1:
            _dk = np.asarray(dk)
            dtype = _dk.dtype
            # unknown type is converted to text
            if dtype.type == np.object_:
                if len(data) == 0:
                    longest = 0
                else:
                    longest = len(max(_dk, key=len))
                    _dk = _dk.astype('|%iS' % longest)
            if _dk.ndim > 1:
                newdtype.append((str(key), _dk.dtype, (_dk.shape[1],)))
            else:
                newdtype.append((str(key), _dk.dtype))
        tab = np.rec.fromarrays((dk for (_, dk) in d2), dtype=newdtype)

    return tab


def __indent__(rows, header=None, units=None, headerChar='-',
               delim=' | ', endline='\n', **kwargs):
    """Indents a table by column.

    Parameters
    ----------
    rows: sequences of rows
        one sequence per row.

    header: sequence of str
        row consists of the columns' names

    units: sequence of str
        Sequence of units

    headerChar: char
        Character to be used for the row separator line

    delim: char
        The column delimiter.

    returns
    -------
    txt: str
        string represation of rows
    """
    length_data = list(map(max, zip(*[list(map(len, k)) for k in rows])))
    length = length_data[:]

    if (header is not None):
        length_header = list(map(len, header))
        length = list(map(max, zip(length_data, length_header)))

    if (units is not None):
        length_units = list(map(len, units))
        length = list(map(max, zip(length_data, length_units)))

    if headerChar not in (None, '', ' '):
        rowSeparator = headerChar * (sum(length) + len(delim) * (len(length) - 1)) + endline
    else:
        rowSeparator = ''

    # make the format
    fmt = ['{{{0:d}:{1:d}s}}'.format(k, l) for (k, l) in enumerate(length)]
    fmt = delim.join(fmt) + endline
    # write the string
    txt = rowSeparator
    if header is not None:
        txt += fmt.format(*header)  # + endline
        txt += rowSeparator
    if units is not None:
        txt += fmt.format(*units)  # + endline
        txt += rowSeparator
    for r in rows:
        txt += fmt.format(*r)  # + endline
    txt += rowSeparator
    return txt


def pprint_rec_entry(data, num=0, keys=None):
        """ print one line with key and values properly to be readable

        Parameters
        ----------
        data: recarray
            data to extract entry from

        num: int, slice
            indice selection

        keys: sequence or str
            if str, can be a regular expression
            if sequence, the sequence of keys to print
        """
        if (keys is None) or (keys == '*'):
            _keys = data.dtype.names
        elif type(keys) in basestring:
            _keys = [k for k in data.dtype.names if (re.match(keys, k) is not None)]
        else:
            _keys = keys

        length = max(map(len, _keys))
        fmt = '{{0:{0:d}s}}: {{1}}'.format(length)
        data = data[num]

        for k in _keys:
            print(fmt.format(k, data[k]))


def pprint_rec_array(data, idx=None, fields=None, ret=False, all=False,
                     headerChar='-', delim=' | ', endline='\n' ):
        """ Pretty print the table content
            you can select the table parts to display using idx to
            select the rows and fields to only display some columns
            (ret is only for insternal use)

        Parameters
        ----------
        data: array
            array to show

        idx: sequence, slide
            sub selection to print

        fields: str, sequence
            if str can be a regular expression, and/or list of fields separated
            by spaces or commas

        ret: bool
            if set return the string representation instead of printing the result

        all: bool
            if set, force to show all rows

        headerChar: char
            Character to be used for the row separator line

        delim: char
            The column delimiter.
        """
        if (fields is None) or (fields == '*'):
            _keys = data.dtype.names
        elif type(fields) in basestring:
            if ',' in fields:
                _fields = fields.split(',')
            elif ' ' in fields:
                _fields = fields.split()
            else:
                _fields = [fields]
            lbls = data.dtype.names
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (re.match(_fk, k) is not None)]
        else:
            lbls = data.dtype.names
            _keys = []
            for _fk in _fields:
                _keys += [k for k in lbls if (re.match(_fk, k) is not None)]

        nfields = len(_keys)
        nrows = len(data)
        fields = list(_keys)

        if idx is None:
            if (nrows < 10) or (all is True):
                rows = [ [ str(data[k][rk]) for k in _keys ] for rk in range(nrows)]
            else:
                _idx = range(6)
                rows = [ [ str(data[k][rk]) for k in _keys ] for rk in range(5) ]
                if nfields > 1:
                    rows += [ ['...' for k in range(nfields) ] ]
                else:
                    rows += [ ['...' for k in range(nfields) ] ]
                rows += [ [ str(data[k][rk]) for k in fields ] for rk in range(-5, 0)]
        elif isinstance(idx, slice):
            _idx = range(idx.start, idx.stop, idx.step or 1)
            rows = [ [ str(data[k][rk]) for k in fields ] for rk in _idx]
        else:
            rows = [ [ str(data[k][rk]) for k in fields ] for rk in idx]

        out = __indent__(rows, header=_keys, units=None, delim=delim,
                         headerChar=headerChar, endline=endline)
        if ret is True:
            return out
        else:
            print(out)


def elementwise(func):
    """
    Quick and dirty elementwise function decorator it provides a quick way
    to apply a function either on one element or a sequence of elements
    """
    @wraps(func)
    def wrapper(it, **kwargs):
        if hasattr(it, '__iter__') & (type(it) not in basestring):
            _f = partial(func, **kwargs)
            return map(_f, it)
        else:
            return func(it, **kwargs)
    return wrapper


[docs]class AstroHelpers(object): """ Helpers related to astronomy data """
[docs] @staticmethod @elementwise def hms2deg(_str, delim=':'): """ Convert hex coordinates into degrees Parameters ---------- str: string or sequence string to convert delimiter: str character delimiting the fields Returns ------- deg: float angle in degrees """ if _str[0] == '-': neg = -1 _str = _str[1:] else: neg = 1 _str = _str.split(delim) return neg * ((((float(_str[-1]) / 60. + float(_str[1])) / 60. + float(_str[0])) / 24. * 360.))
[docs] @staticmethod @elementwise def deg2dms(val, delim=':'): """ Convert degrees into hex coordinates Parameters ---------- deg: float angle in degrees delimiter: str character delimiting the fields Returns ------- str: string or sequence string to convert """ if val < 0: sign = -1 else: sign = 1 d = int( sign * val ) m = int( (sign * val - d) * 60. ) s = (( sign * val - d) * 60. - m) * 60. return '{0}{1}{2}{3}{4}'.format( sign * d, delim, m, delim, s)
[docs] @staticmethod @elementwise def deg2hms(val, delim=':'): """ Convert degrees into hex coordinates Parameters ---------- deg: float angle in degrees delimiter: str character delimiting the fields Returns ------- str: string or sequence string to convert """ if val < 0: sign = -1 else: sign = 1 h = int( sign * val / 45. * 3.) # * 24 / 360 m = int( (sign * val / 45. * 3. - h) * 60. ) s = (( sign * val / 45. * 3. - h) * 60. - m) * 60. return '{0}{1}{2}{3}{4}'.format( sign * h, delim, m, delim, s)
[docs] @staticmethod @elementwise def dms2deg(_str, delim=':'): """ Convert hex coordinates into degrees Parameters ---------- str: string or sequence string to convert delimiter: str character delimiting the fields Returns ------- deg: float angle in degrees """ if _str[0] == '-': neg = -1 _str = _str[1:] else: neg = 1 _str = _str.split(delim) return (neg * ((float(_str[-1]) / 60. + float(_str[1])) / 60. + float(_str[0])))
[docs] @staticmethod @elementwise def euler(ai_in, bi_in, select, b1950=False, dtype='f8'): """ Transform between Galactic, celestial, and ecliptic coordinates. Celestial coordinates (RA, Dec) should be given in equinox J2000 unless the b1950 is True. select From To | select From To ---------------------------------------------------------------------- 1 RA-Dec (2000) Galactic | 4 Ecliptic RA-Dec 2 Galactic RA-DEC | 5 Ecliptic Galactic 3 RA-Dec Ecliptic | 6 Galactic Ecliptic Parameters ---------- long_in: float, or sequence Input Longitude in DEGREES, scalar or vector. lat_in: float, or sequence Latitude in DEGREES select: int Integer from 1 to 6 specifying type of coordinate transformation. b1950: bool set equinox set to 1950 Returns ------- long_out: float, seq Output Longitude in DEGREES lat_out: float, seq Output Latitude in DEGREES REVISION HISTORY: Written W. Landsman, February 1987 Adapted from Fortran by Daryl Yentis NRL Converted to IDL V5.0 W. Landsman September 1997 Made J2000 the default, added /FK4 keyword W. Landsman December 1998 Add option to specify SELECT as a keyword W. Landsman March 2003 Converted from IDL to numerical Python: Erin Sheldon, NYU, 2008-07-02 """ # Make a copy as an array. ndmin=1 to avoid messed up scalar arrays ai = np.array(ai_in, ndmin=1, copy=True, dtype=dtype) bi = np.array(bi_in, ndmin=1, copy=True, dtype=dtype) PI = math.pi # HALFPI = PI / 2.0 D2R = PI / 180.0 R2D = 1.0 / D2R twopi = 2.0 * PI fourpi = 4.0 * PI # J2000 coordinate conversions are based on the following constants # (see the Hipparcos explanatory supplement). # eps = 23.4392911111d Obliquity of the ecliptic # alphaG = 192.85948d Right Ascension of Galactic North Pole # deltaG = 27.12825d Declination of Galactic North Pole # lomega = 32.93192d Galactic longitude of celestial equator # alphaE = 180.02322d Ecliptic longitude of Galactic North Pole # deltaE = 29.811438523d Ecliptic latitude of Galactic North Pole # Eomega = 6.3839743d Galactic longitude of ecliptic equator # Parameters for all the different conversions if b1950: # equinox = '(B1950)' psi = np.array([ 0.57595865315, 4.9261918136, 0.00000000000, 0.0000000000, 0.11129056012, 4.7005372834], dtype=dtype) stheta = np.array([ 0.88781538514, -0.88781538514, 0.39788119938, -0.39788119938, 0.86766174755, -0.86766174755], dtype=dtype) ctheta = np.array([ 0.46019978478, 0.46019978478, 0.91743694670, 0.91743694670, 0.49715499774, 0.49715499774], dtype=dtype) phi = np.array([ 4.9261918136, 0.57595865315, 0.0000000000, 0.00000000000, 4.7005372834, 0.11129056012], dtype=dtype) else: # equinox = '(J2000)' psi = np.array([ 0.57477043300, 4.9368292465, 0.00000000000, 0.0000000000, 0.11142137093, 4.71279419371], dtype=dtype) stheta = np.array([ 0.88998808748, -0.88998808748, 0.39777715593, -0.39777715593, 0.86766622025, -0.86766622025], dtype=dtype) ctheta = np.array([ 0.45598377618, 0.45598377618, 0.91748206207, 0.91748206207, 0.49714719172, 0.49714719172], dtype=dtype) phi = np.array([ 4.9368292465, 0.57477043300, 0.0000000000, 0.00000000000, 4.71279419371, 0.11142137093], dtype=dtype) # zero offset i = select - 1 a = ai * D2R - phi[i] b = bi * D2R sb = sin(b) cb = cos(b) cbsa = cb * sin(a) b = -stheta[i] * cbsa + ctheta[i] * sb w, = np.where(b > 1.0) if w.size > 0: b[w] = 1.0 bo = arcsin(b) * R2D a = arctan2( ctheta[i] * cbsa + stheta[i] * sb, cb * cos(a) ) ao = ( (a + psi[i] + fourpi) % twopi) * R2D return ao, bo
[docs] @staticmethod def sphdist(ra1, dec1, ra2, dec2): """measures the spherical distance between 2 points Parameters ---------- ra1: float or sequence first right ascensions in degrees dec1: float or sequence first declination in degrees ra2: float or sequence second right ascensions in degrees dec2: float or sequence first declination in degrees Returns ------- Outputs: float or sequence returns a distance in degrees """ dec1_r = deg2rad(dec1) dec2_r = deg2rad(dec2) return 2. * rad2deg(arcsin(sqrt((sin((dec1_r - dec2_r) / 2)) ** 2 + cos(dec1_r) * cos(dec2_r) * ( sin((deg2rad(ra1 - ra2)) / 2)) ** 2)))
[docs] @staticmethod def conesearch(ra0, dec0, ra, dec, r, outtype=0): """ Perform a cone search on a table Parameters ---------- ra0: ndarray[ndim=1, dtype=float] column name to use as RA source in degrees dec0: ndarray[ndim=1, dtype=float] column name to use as DEC source in degrees ra: float ra to look for (in degree) dec: float ra to look for (in degree) r: float distance in degrees outtype: int type of outputs 0 -- minimal, indices of matching coordinates 1 -- indices and distances of matching coordinates 2 -- full, boolean filter and distances Returns ------- t: tuple if outtype is 0: only return indices from ra0, dec0 elif outtype is 1: return indices from ra0, dec0 and distances elif outtype is 2: return conditional vector and distance to all ra0, dec0 """ @elementwise def getDist( pk ): """ get spherical distance between 2 points """ return AstroHelpers.sphdist(pk[0], pk[1], ra, dec) dist = np.array(list(getDist(zip(ra0, dec0)))) v = (dist <= r) if outtype == 0: return np.ravel(np.where(v)) elif outtype == 1: return np.ravel(np.where(v)), dist[v] else: return v, dist
# ============================================================================== # SimpleTable -- provides table manipulations with limited storage formats # ==============================================================================
[docs]class SimpleTable(object): """ Table class that is designed to be the basis of any format wrapping around numpy recarrays Attributes ---------- fname: str or object if str, the file to read from. This may be limited to the format currently handled automatically. If the format is not correctly handled, you can try by providing an object.__ if object with a structure like dict, ndarray, or recarray-like the data will be encapsulated into a Table caseless: bool if set, column names will be caseless during operations aliases: dict set of column aliases (can be defined later :func:`set_alias`) units: dict set of column units (can be defined later :func:`set_unit`) desc: dict set of column description or comments (can be defined later :func:`set_comment`) header: dict key, value pair corresponding to the attributes of the table """ def __init__(self, fname, *args, **kwargs): dtype = kwargs.pop('dtype', None) self.caseless = kwargs.get('caseless', False) self._aliases = kwargs.get('aliases', {}) self._units = kwargs.get('units', {}) self._desc = kwargs.get('desc', {}) if (isinstance(fname, (dict, tuple, list, types.GeneratorType))) or (dtype in [dict, 'dict']): try: self.header = fname.pop('header', {}) except (AttributeError, TypeError): self.header = kwargs.pop('header', {}) self.data = _convert_dict_to_structured_ndarray(fname) elif (type(fname) in (str,)) or (dtype is not None): if (type(fname) in (str,)): extension = fname.split('.')[-1] else: extension = None if (extension == 'csv') or dtype == 'csv': kwargs.setdefault('delimiter', ',') commentedHeader = kwargs.pop('commentedHeader', False) n, header, units, comments, aliases, names = _ascii_read_header(fname, commentedHeader=commentedHeader, **kwargs) kwargs.setdefault('names', names) if _pd is not None: # pandas is faster kwargs.setdefault('comment', '#') kwargs.setdefault('as_recarray', True) kwargs.setdefault('skiprows', n) self.data = _pd.read_csv(fname, *args, **kwargs) else: kwargs.setdefault('skip_header', n) kwargs.setdefault('comments', '#') self.data = np.recfromcsv(fname, *args, **kwargs) self.header = header self._units.update(**units) self._desc.update(**comments) self._aliases.update(**aliases) kwargs.setdefault('names', True) elif (extension in ('tsv', 'dat', 'txt')) or dtype in ('tsv', 'dat', 'txt'): commentedHeader = kwargs.pop('commentedHeader', True) n, header, units, comments, aliases, names = _ascii_read_header(fname, commentedHeader=commentedHeader, **kwargs) kwargs.setdefault('names', names) if _pd is not None: # pandas is faster kwargs.setdefault('delimiter', '\s+') kwargs.setdefault('comment', '#') kwargs.setdefault('as_recarray', True) self.data = _pd.read_csv(fname, *args, **kwargs) else: kwargs.setdefault('delimiter', None) kwargs.setdefault('comments', '#') kwargs.setdefault('skip_header', n) self.data = np.recfromtxt(fname, *args, **kwargs) self.header = header self._units.update(**units) self._desc.update(**comments) self._aliases.update(**aliases) elif (extension == 'fits') or dtype == 'fits': if pyfits is None: raise RuntimeError('Cannot read this format, Astropy or pyfits not found') if ('extname' not in kwargs) and ('ext' not in kwargs) and (len(args) == 0): args = (1, ) self.data = np.array(pyfits.getdata(fname, *args, **kwargs)) header, aliases, units, comments = _fits_read_header(pyfits.getheader(fname, *args, **kwargs)) self.header = header self._desc.update(**comments) self._units.update(**units) self._aliases.update(**aliases) elif (extension in ('hdf5', 'hd5', 'hdf')) or dtype in (extension in ('hdf5', 'hd5', 'hdf')): if tables is None: raise RuntimeError('Cannot read this format, pytables not found') hdr, aliases, units, desc, data = _hdf5_read_data(fname, *args, **kwargs) self.data = data self.header = hdr self._units.update(**units) self._desc.update(**desc) self._aliases.update(**aliases) else: raise Exception('Format {0:s} not handled'.format(extension)) elif type(fname) == np.ndarray: self.data = fname self.header = {} elif type(fname) == pyfits.FITS_rec: self.data = np.array(fname) self.header = {} elif type(fname) == SimpleTable: cp = kwargs.pop('copy', True) if cp: self.data = deepcopy(fname.data) self.header = deepcopy(fname.header) self._aliases = deepcopy(fname._aliases) self._units = deepcopy(fname._units) self._desc = deepcopy(fname._desc) else: self.data = fname.data self.header = fname.header self._aliases = fname._aliases self._units = fname._units self._desc = fname._desc elif hasattr(fname, 'dtype'): self.data = np.array(fname) self.header = {} else: raise Exception('Type {0!s:s} not handled'.format(type(fname))) if 'NAME' not in self.header: if type(fname) not in basestring: self.header['NAME'] = 'No Name' else: self.header['NAME'] = fname
[docs] def pprint_entry(self, num, keys=None): """ print one line with key and values properly to be readable Parameters ---------- num: int, slice indice selection keys: sequence or str if str, can be a regular expression if sequence, the sequence of keys to print """ if (keys is None) or (keys == '*'): _keys = self.keys() elif type(keys) in basestring: _keys = [k for k in (self.keys() + tuple(self._aliases.keys())) if (re.match(keys, k) is not None)] else: _keys = keys length = max(map(len, _keys)) fmt = '{{0:{0:d}s}}: {{1}}'.format(length) data = self[num] for k in _keys: print(fmt.format(k, data[self.resolve_alias(k)]))
[docs] def pprint(self, idx=None, fields=None, ret=False, all=False, full_match=False, headerChar='-', delim=' | ', endline='\n', **kwargs): """ Pretty print the table content you can select the table parts to display using idx to select the rows and fields to only display some columns (ret is only for insternal use) Parameters ---------- idx: sequence, slide sub selection to print fields: str, sequence if str can be a regular expression, and/or list of fields separated by spaces or commas ret: bool if set return the string representation instead of printing the result all: bool if set, force to show all rows headerChar: char Character to be used for the row separator line delim: char The column delimiter. """ if full_match is True: fn = re.fullmatch else: fn = re.match if (fields is None) or (fields == '*'): _keys = self.keys() elif type(fields) in basestring: if ',' in fields: _fields = fields.split(',') elif ' ' in fields: _fields = fields.split() else: _fields = [fields] lbls = self.keys() + tuple(self._aliases.keys()) _keys = [] for _fk in _fields: _keys += [k for k in lbls if (fn(_fk, k) is not None)] else: lbls = self.keys() + tuple(self._aliases.keys()) _keys = [] for _fk in _fields: _keys += [k for k in lbls if (fn(_fk, k) is not None)] nfields = len(_keys) fields = list(map( self.resolve_alias, _keys )) if idx is None: if (self.nrows < 10) or all: rows = [ [ str(self[k][rk]) for k in _keys ] for rk in range(self.nrows)] else: _idx = range(6) rows = [ [ str(self[k][rk]) for k in _keys ] for rk in range(5) ] if nfields > 1: rows += [ ['...' for k in range(nfields) ] ] else: rows += [ ['...' for k in range(nfields) ] ] rows += [ [ str(self[k][rk]) for k in fields ] for rk in range(-5, 0)] elif isinstance(idx, slice): _idx = range(idx.start, idx.stop, idx.step or 1) rows = [ [ str(self[k][rk]) for k in fields ] for rk in _idx] else: rows = [ [ str(self[k][rk]) for k in fields ] for rk in idx] if len(self._units) == 0: units = None else: units = [ '(' + str( self._units.get(k, None) or '') + ')' for k in fields ] out = __indent__(rows, header=_keys, units=units, delim=delim, headerChar=headerChar, endline=endline) if ret is True: return out else: print(out)
[docs] def write(self, fname, **kwargs): """ write table into file Parameters ---------- fname: str filename to export the table into .. note:: additional keywords are forwarded to the corresponding libraries :func:`pyfits.writeto` or :func:`pyfits.append` :func:`np.savetxt` """ extension = kwargs.pop('extension', None) if extension is None: extension = fname.split('.')[-1] if (extension == 'csv'): comments = kwargs.pop('comments', '#') delimiter = kwargs.pop('delimiter', ',') commentedHeader = kwargs.pop('commentedHeader', False) hdr = _ascii_generate_header(self, comments=comments, delimiter=delimiter, commentedHeader=commentedHeader) header = kwargs.pop('header', hdr) np.savetxt(fname, self.data, delimiter=delimiter, header=header, comments='', **kwargs) elif (extension in ['txt', 'dat']): comments = kwargs.pop('comments', '#') delimiter = kwargs.pop('delimiter', ' ') commentedHeader = kwargs.pop('commentedHeader', True) hdr = _ascii_generate_header(self, comments=comments, delimiter=delimiter, commentedHeader=commentedHeader) header = kwargs.pop('header', hdr) np.savetxt(fname, self.data, delimiter=delimiter, header=header, comments='', **kwargs) elif (extension == 'fits'): hdr0 = kwargs.pop('header', None) append = kwargs.pop('append', False) hdr = _fits_generate_header(self) if hdr0 is not None: hdr.update(**hdr0) if append: _fits_append(fname, self.data, hdr, **kwargs) else: # patched version to correctly include the header _fits_writeto(fname, self.data, hdr, **kwargs) elif (extension in ('hdf', 'hdf5', 'hd5')): _hdf5_write_data(fname, self.data, header=self.header, units=self._units, comments=self._desc, aliases=self._aliases, **kwargs) else: raise Exception('Format {0:s} not handled'.format(extension))
[docs] def set_alias(self, alias, colname): """ Define an alias to a column Parameters ---------- alias: str The new alias of the column colname: str The column being aliased """ if (colname not in self.keys()): raise KeyError("Column {0:s} does not exist".format(colname)) self._aliases[alias] = colname
[docs] def reverse_alias(self, colname): """ Return aliases of a given column. Given a colname, return a sequence of aliases associated to this column Aliases are defined by using .define_alias() """ _colname = self.resolve_alias(colname) if (_colname not in self.keys()): raise KeyError("Column {0:s} does not exist".format(colname)) return tuple([ k for (k, v) in self._aliases.iteritems() if (v == _colname) ])
[docs] def resolve_alias(self, colname): """ Return the name of an aliased column. Given an alias, return the column name it aliases. This function is a no-op if the alias is a column name itself. Aliases are defined by using .define_alias() """ # User aliases if hasattr(colname, '__iter__') & (type(colname) not in basestring): return [ self.resolve_alias(k) for k in colname ] else: if self.caseless is True: maps = dict( [ (k.lower(), v) for k, v in self._aliases.items() ] ) maps.update( (k.lower(), k) for k in self.keys() ) return maps.get(colname.lower(), colname) else: return self._aliases.get(colname, colname)
[docs] def set_unit(self, colname, unit): """ Set the unit of a column referenced by its name Parameters ---------- colname: str column name or registered alias unit: str unit description """ if isinstance(unit, basestring) and isinstance(colname, basestring): self._units[self.resolve_alias(colname)] = str(unit) else: for k, v in zip(colname, unit): self._units[self.resolve_alias(k)] = str(v)
[docs] def set_comment(self, colname, comment): """ Set the comment of a column referenced by its name Parameters ---------- colname: str column name or registered alias comment: str column description """ if isinstance(comment, basestring) and isinstance(colname, basestring): self._desc[self.resolve_alias(colname)] = str(comment) else: for k, v in zip(colname, comment): self._desc[self.resolve_alias(k)] = str(v)
[docs] def keys(self, regexp=None, full_match=False): """ Return the data column names or a subset of it Parameters ---------- regexp: str pattern to filter the keys with full_match: bool if set, use :func:`re.fullmatch` instead of :func:`re.match` Try to apply the pattern at the start of the string, returning a match object, or None if no match was found. returns ------- seq: sequence sequence of keys """ if (regexp is None) or (regexp == '*'): return self.colnames elif type(regexp) in basestring: if full_match is True: fn = re.fullmatch else: fn = re.match if regexp.count(',') > 0: _re = regexp.split(',') elif regexp.count(' ') > 0: _re = regexp.split() else: _re = [regexp] lbls = self.colnames + tuple(self._aliases.keys()) _keys = [] for _rk in _re: _keys += [k for k in lbls if (fn(_rk, k) is not None)] return _keys elif hasattr(regexp, '__iter__'): _keys = [] for k in regexp: _keys += self.keys(k) return _keys else: raise ValueError('Unexpected type {0} for regexp'.format(type(regexp)))
@property def name(self): """ name of the table given by the Header['NAME'] attribute """ return self.header.get('NAME', None) @property def colnames(self): """ Sequence of column names """ return self.data.dtype.names @property def ncols(self): """ number of columns """ return len(self.colnames) @property def nrows(self): """ number of lines """ return len(self.data) @property def nbytes(self): """ number of bytes of the object """ n = sum(k.nbytes if hasattr(k, 'nbytes') else sys.getsizeof(k) for k in self.__dict__.values()) return n def __len__(self): """ number of lines """ return self.nrows @property def shape(self): """ shape of the data """ return self.data.shape @property def dtype(self): """ dtype of the data """ return self.data.dtype def __getitem__(self, v): return np.asarray(self.data.__getitem__(self.resolve_alias(v)))
[docs] def get(self, v, full_match=False): """ returns a table from columns given as v this function is equivalent to :func:`__getitem__` but preserve the Table format and associated properties (units, description, header) Parameters ---------- v: str pattern to filter the keys with full_match: bool if set, use :func:`re.fullmatch` instead of :func:`re.match` """ new_keys = self.keys(v) t = self.__class__(self[new_keys]) t.header.update(**self.header) t._aliases.update((k, v) for (k, v) in self._aliases.items() if v in new_keys) t._units.update((k, v) for (k, v) in self._units.items() if v in new_keys) t._desc.update((k, v) for (k, v) in self._desc.items() if v in new_keys) return t
def __setitem__(self, k, v): if k in self: return self.data.__setitem__(self.resolve_alias(k), v) else: object.__setitem__(self, k, v) def __getattr__(self, k): try: return self.data.__getitem__(self.resolve_alias(k)) except: return object.__getattribute__(self, k) def __iter__(self): newtab = self.select('*', [0]) for d in self.data: newtab.data[0] = d yield newtab # return self.data.__iter__()
[docs] def iterkeys(self): """ Iterator over the columns of the table """ for k in self.colnames: yield k
[docs] def itervalues(self): """ Iterator over the lines of the table """ for l in self.data: yield l
[docs] def info(self): s = "\nTable: {name:s}\n nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}" s = s.format(name=self.header.get('NAME', 'Noname'), s=self, size=pretty_size_print(self.nbytes)) s += '\n\nHeader:\n' vals = list(self.header.items()) length = max(map(len, self.header.keys())) fmt = '\t{{0:{0:d}s}} {{1}}\n'.format(length) for k, v in vals: s += fmt.format(k, v) vals = [(k, self._units.get(k, ''), self._desc.get(k, '')) for k in self.colnames] lengths = [(len(k), len(self._units.get(k, '')), len(self._desc.get(k, ''))) for k in self.colnames] lengths = list(map(max, (zip(*lengths)))) s += '\nColumns:\n' fmt = '\t{{0:{0:d}s}} {{1:{1:d}s}} {{2:{2:d}s}}\n'.format(*(k + 1 for k in lengths)) for k, u, c in vals: s += fmt.format(k, u, c) print(s) if len(self._aliases) > 0: print("\nTable contains alias(es):") for k, v in self._aliases.items(): print('\t{0:s} --> {1:s}'.format(k, v))
def __repr__(self): s = object.__repr__(self) s += "\nTable: {name:s}\n nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}" return s.format(name=self.header.get('NAME', 'Noname'), s=self, size=pretty_size_print(self.nbytes)) def __getslice__(self, i, j): return self.data.__getslice__(i, j) def __contains__(self, k): return (k in self.colnames) or (k in self._aliases) def __array__(self): return self.data def __call__(self, *args, **kwargs): if (len(args) > 0) or (len(kwargs) > 0): return self.evalexpr(*args, **kwargs) else: return self.info()
[docs] def sort(self, keys, copy=False): """ Sort the table inplace according to one or more keys. This operates on the existing table (and does not return a new table). Parameters ---------- keys: str or seq(str) The key(s) to order by copy: bool if set returns a sorted copy instead of working inplace """ if not hasattr(keys, '__iter__'): keys = [keys] if copy is False: self.data.sort(order=keys) else: t = self.__class__(self, copy=True) t.sort(keys, copy=False) return t
[docs] def match(self, r2, key): """ Returns the indices at which the tables match matching uses 2 columns that are compared in values Parameters ---------- r2: Table second table to use key: str fields used for comparison. Returns ------- indexes: tuple tuple of both indices list where the two columns match. """ return np.where( np.equal.outer( self[key], r2[key] ) )
''' def stack(self, r, defaults=None): """ Superposes arrays fields by fields inplace Parameters ---------- r: Table """ if not hasattr(r, 'data'): raise AttributeError('r should be a Table object') self.data = recfunctions.stack_arrays([self.data, r.data], defaults, usemask=False, asrecarray=True) '''
[docs] def stack(self, r, *args, **kwargs): """ Superposes arrays fields by fields inplace t.stack(t1, t2, t3, default=None, inplace=True) Parameters ---------- r: Table """ if not hasattr(r, 'data'): raise AttributeError('r should be a Table object') defaults = kwargs.get('defaults', None) inplace = kwargs.get('inplace', False) data = [self.data, r.data] + [k.data for k in args] sdata = recfunctions.stack_arrays(data, defaults, usemask=False, asrecarray=True) if inplace: self.data = sdata else: t = self.__class__(self) t.data = sdata return t
[docs] def join_by(self, r2, key, jointype='inner', r1postfix='1', r2postfix='2', defaults=None, asrecarray=False, asTable=True): """ Join arrays `r1` and `r2` on key `key`. The key should be either a string or a sequence of string corresponding to the fields used to join the array. An exception is raised if the `key` field cannot be found in the two input arrays. Neither `r1` nor `r2` should have any duplicates along `key`: the presence of duplicates will make the output quite unreliable. Note that duplicates are not looked for by the algorithm. Parameters ---------- key: str or seq(str) corresponding to the fields used for comparison. r2: Table Table to join with jointype: str in {'inner', 'outer', 'leftouter'} * 'inner' : returns the elements common to both r1 and r2. * 'outer' : returns the common elements as well as the elements of r1 not in r2 and the elements of not in r2. * 'leftouter' : returns the common elements and the elements of r1 not in r2. r1postfix: str String appended to the names of the fields of r1 that are present in r2 r2postfix: str String appended to the names of the fields of r2 that are present in r1 defaults: dict Dictionary mapping field names to the corresponding default values. Returns ------- tab: Table joined table .. note:: * The output is sorted along the key. * A temporary array is formed by dropping the fields not in the key for the two arrays and concatenating the result. This array is then sorted, and the common entries selected. The output is constructed by filling the fields with the selected entries. Matching is not preserved if there are some duplicates... """ arr = recfunctions.join_by(key, self.data, r2.data, jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix, defaults=defaults, usemask=False, asrecarray=True) return SimpleTable(arr)
@property def empty_row(self): """ Return an empty row array respecting the table format """ return np.rec.recarray(shape=(1,), dtype=self.data.dtype)
[docs] def add_column(self, name, data, dtype=None, unit=None, description=None): """ Add one or multiple columns to the table Parameters ---------- name: str or sequence(str) The name(s) of the column(s) to add data: ndarray, or sequence of ndarray The column data, or sequence of columns dtype: dtype numpy dtype for the data to add unit: str The unit of the values in the column description: str A description of the content of the column """ _data = np.array(data, dtype=dtype) dtype = _data.dtype # unknown type is converted to text if dtype.type == np.object_: if len(data) == 0: longest = 0 else: longest = len(max(data, key=len)) _data = np.asarray(data, dtype='|%iS' % longest) dtype = _data.dtype if len(self.data.dtype) > 0: # existing data in the table self.data = recfunctions.append_fields(self.data, name, _data, dtypes=dtype, usemask=False, asrecarray=True) else: if _data.ndim > 1: newdtype = (str(name), _data.dtype, (_data.shape[1],)) else: newdtype = (str(name), _data.dtype) self.data = np.array(_data, dtype=[newdtype]) if unit is not None: self.set_unit(name, unit) if description is not None: self.set_unit(name, description)
[docs] def append_row(self, iterable): """ Append one row in this table. see also: :func:`stack` Parameters ---------- iterable: iterable line to add """ if (len(iterable) != self.ncols): raise AttributeError('Expecting as many items as columns') r = self.empty_row for k, v in enumerate(iterable): r[0][k] = v self.stack(r)
[docs] def remove_columns(self, names): """ Remove several columns from the table Parameters ---------- names: sequence A list containing the names of the columns to remove """ self.pop_columns(names)
[docs] def pop_columns(self, names): """ Pop several columns from the table Parameters ---------- names: sequence A list containing the names of the columns to remove Returns ------- values: tuple list of columns """ if not hasattr(names, '__iter__') or type(names) in basestring: names = [names] p = [self[k] for k in names] _names = set([ self.resolve_alias(k) for k in names ]) self.data = recfunctions.drop_fields(self.data, _names) for k in names: self._aliases.pop(k, None) self._units.pop(k, None) self._desc.pop(k, None) return p
[docs] def find_duplicate(self, index_only=False, values_only=False): """Find duplication in the table entries, return a list of duplicated elements Only works at this time is 2 lines are *the same entry* not if 2 lines have *the same values* """ dup = [] idd = [] for i in range(len(self.data)): if (self.data[i] in self.data[i + 1:]): if (self.data[i] not in dup): dup.append(self.data[i]) idd.append(i) if index_only: return idd elif values_only: return dup else: return zip(idd, dup)
[docs] def evalexpr(self, expr, exprvars=None, dtype=float): """ evaluate expression based on the data and external variables all np function can be used (log, exp, pi...) Parameters ---------- expr: str expression to evaluate on the table includes mathematical operations and attribute names exprvars: dictionary, optional A dictionary that replaces the local operands in current frame. dtype: dtype definition dtype of the output array Returns ------- out : NumPy array array of the result """ _globals = {} for k in ( list(self.colnames) + list(self._aliases.keys()) ): if k in expr: _globals[k] = self[k] if exprvars is not None: if (not (hasattr(exprvars, 'keys') & hasattr(exprvars, '__getitem__' ))): raise AttributeError("Expecting a dictionary-like as condvars") for k, v in ( exprvars.items() ): _globals[k] = v # evaluate expression, to obtain the final filter r = np.empty( self.nrows, dtype=dtype) r[:] = eval(expr, _globals, np.__dict__) return r
[docs] def where(self, condition, condvars=None, *args, **kwargs): """ Read table data fulfilling the given `condition`. Only the rows fulfilling the `condition` are included in the result. Parameters ---------- condition: str expression to evaluate on the table includes mathematical operations and attribute names condvars: dictionary, optional A dictionary that replaces the local operands in current frame. Returns ------- out: ndarray/ tuple of ndarrays result equivalent to :func:`np.where` """ ind = np.where(self.evalexpr(condition, condvars, dtype=bool ), *args, **kwargs) return ind
[docs] def select(self, fields, indices=None, **kwargs): """ Select only a few fields in the table Parameters ---------- fields: str or sequence fields to keep in the resulting table indices: sequence or slice extract only on these indices returns ------- tab: SimpleTable instance resulting table """ _fields = self.keys(fields) if fields == '*': if indices is None: return self else: tab = self.__class__(self[indices]) for k in self.__dict__.keys(): if k not in ('data', ): setattr(tab, k, deepcopy(self.__dict__[k])) return tab else: d = {} for k in _fields: _k = self.resolve_alias(k) if indices is not None: d[k] = self[_k][indices] else: d[k] = self[_k] d['header'] = deepcopy(self.header) tab = self.__class__(d) for k in self.__dict__.keys(): if k not in ('data', ): setattr(tab, k, deepcopy(self.__dict__[k])) return tab
[docs] def selectWhere(self, fields, condition, condvars=None, **kwargs): """ Read table data fulfilling the given `condition`. Only the rows fulfilling the `condition` are included in the result. Parameters ---------- fields: str or sequence fields to keep in the resulting table condition: str expression to evaluate on the table includes mathematical operations and attribute names condvars: dictionary, optional A dictionary that replaces the local operands in current frame. Returns ------- tab: SimpleTable instance resulting table """ if condition in [True, 'True', None]: ind = None else: ind = self.where(condition, condvars, **kwargs) tab = self.select(fields, indices=ind) return tab
[docs] def groupby(self, key): """ Create an iterator which returns (key, sub-table) grouped by each value of key(value) Parameters ---------- key: str expression or pattern to filter the keys with Returns ------- key: str or sequence group key tab: SimpleTable instance sub-table of the group header, aliases and column metadata are preserved (linked to the master table). """ _key = self.keys(key) getter = operator.itemgetter(*_key) for k, grp in itertools.groupby(self.data, getter): t = self.__class__(np.dstack(grp)) t.header = self.header t._aliases = self._aliases t._units = self._units t._desc = self._desc yield (k, t)
[docs] def stats(self, fn=None, fields=None, fill=None): """ Make statistics on columns of a table Paramters --------- fn: callable or sequence of callables functions to apply to each column default: (np.mean, np.std, np.nanmin, np.nanmax) fields: str or sequence any key or key expression to subselect columns default is all columns fill: value value when not applicable default np.nan returns ------- tab: Table instance collection of statistics, one column per function in fn and 1 ligne per column in the table """ from collections import OrderedDict fn = (stats.mean, stats.std, stats.min, stats.max, stats.has_nan) d = OrderedDict() d.setdefault('FIELD', []) for k in fn: d.setdefault(k.__name__, []) if fields is None: fields = self.colnames else: fields = self.keys(fields) if fill is None: fill = np.nan for k in fields: d['FIELD'].append(k) for fnk in fn: try: val = fnk(self[k]) except: val = fill d[fnk.__name__].append(val) return self.__class__(d, dtype=dict)
# method aliases remove_column = remove_columns # deprecated methods addCol = add_column addLine = append_row setComment = set_comment setUnit = set_unit delCol = remove_columns
[docs]class AstroTable(SimpleTable): """ Derived from the Table, this class add implementations of common astro tools especially conesearch """ def __init__(self, *args, **kwargs): super(self.__class__, self).__init__(*args, **kwargs) self._ra_name, self._dec_name = self.__autoRADEC__() if (len(args) > 0): if isinstance(args[0], AstroTable): self._ra_name = args[0]._ra_name self._dec_name = args[0]._dec_name self._ra_name = kwargs.get('ra_name', self._ra_name) self._dec_name = kwargs.get('dec_name', self._dec_name) def __autoRADEC__(self): """ Tries to identify the columns containing RA and DEC coordinates """ if 'ra' in self: ra_name = 'ra' elif 'RA' in self: ra_name = 'RA' else: ra_name = None if 'dec' in self: dec_name = 'dec' elif 'DEC' in self: dec_name = 'DEC' else: dec_name = None return ra_name, dec_name
[docs] def set_RA(self, val): """ Set the column that defines RA coordinates """ assert(val in self), 'column name {} not found in the table'.format(val) self._ra_name = val
[docs] def set_DEC(self, val): """ Set the column that defines DEC coordinates """ assert(val in self), 'column name {} not found in the table'.format(val) self._dec_name = val
[docs] def get_RA(self, degree=True): """ Returns RA, converted from hexa/sexa into degrees """ if self._ra_name is None: return None if (not degree) or (self.dtype[self._ra_name].kind != 'S'): return self[self._ra_name] else: if (len(str(self[0][self._ra_name]).split(':')) == 3): return np.asarray(AstroHelpers.hms2deg(self[self._ra_name], delim=':')) elif (len(str(self[0][self._ra_name]).split(' ')) == 3): return np.asarray(AstroHelpers.hms2deg(self[self._ra_name], delim=' ')) else: raise Exception('RA Format not understood')
[docs] def get_DEC(self, degree=True): """ Returns RA, converted from hexa/sexa into degrees """ if self._dec_name is None: return None if (not degree) or (self.dtype[self._dec_name].kind != 'S'): return self[self._dec_name] else: if (len(str(self[0][self._dec_name]).split(':')) == 3): return np.asarray(AstroHelpers.dms2deg(self[self._dec_name], delim=':')) elif (len(str(self[0][self._dec_name]).split(' ')) == 3): return np.asarray(AstroHelpers.dms2deg(self[self._dec_name], delim=' ')) else: raise Exception('RA Format not understood')
[docs] def info(self): s = "\nTable: {name:s}\n nrows={s.nrows:d}, ncols={s.ncols:d}, mem={size:s}" s = s.format(name=self.header.get('NAME', 'Noname'), s=self, size=pretty_size_print(self.nbytes)) s += '\n\nHeader:\n' vals = list(self.header.items()) length = max(map(len, self.header.keys())) fmt = '\t{{0:{0:d}s}} {{1}}\n'.format(length) for k, v in vals: s += fmt.format(k, v) vals = [(k, self._units.get(k, ''), self._desc.get(k, '')) for k in self.colnames] lengths = [(len(k), len(self._units.get(k, '')), len(self._desc.get(k, ''))) for k in self.colnames] lengths = list(map(max, (zip(*lengths)))) if (self._ra_name is not None) & (self._dec_name is not None): s += "\nPosition coordinate columns: {0}, {1}\n".format(self._ra_name, self._dec_name) s += '\nColumns:\n' fmt = '\t{{0:{0:d}s}} {{1:{1:d}s}} {{2:{2:d}s}}\n'.format(*(k + 1 for k in lengths)) for k, u, c in vals: s += fmt.format(k, u, c) print(s) if len(self._aliases) > 0: print("\nTable contains alias(es):") for k, v in self._aliases.items(): print('\t{0:s} --> {1:s}'.format(k, v))
[docs] def coneSearch(self, ra, dec, r, outtype=0): """ Perform a cone search on a table Parameters ---------- ra0: ndarray[ndim=1, dtype=float] column name to use as RA source in degrees dec0: ndarray[ndim=1, dtype=float] column name to use as DEC source in degrees ra: float ra to look for (in degree) dec: float ra to look for (in degree) r: float distance in degrees outtype: int type of outputs 0 -- minimal, indices of matching coordinates 1 -- indices and distances of matching coordinates 2 -- full, boolean filter and distances Returns ------- t: tuple if outtype is 0: only return indices from ra0, dec0 elif outtype is 1: return indices from ra0, dec0 and distances elif outtype is 2: return conditional vector and distance to all ra0, dec0 """ if (self._ra_name is None) or (self._dec_name is None): raise AttributeError('Coordinate columns not set.') ra0 = self.get_RA() dec0 = self.get_DEC() return AstroHelpers.conesearch(ra0, dec0, ra, dec, r, outtype=outtype)
[docs] def zoneSearch(self, ramin, ramax, decmin, decmax, outtype=0): """ Perform a zone search on a table, i.e., a rectangular selection Parameters ---------- ramin: float minimal value of RA ramax: float maximal value of RA decmin: float minimal value of DEC decmax: float maximal value of DEC outtype: int type of outputs 0 or 1 -- minimal, indices of matching coordinates 2 -- full, boolean filter and distances Returns ------- r: sequence indices or conditional sequence of matching values """ assert( (self._ra_name is not None) & (self._dec_name is not None) ), 'Coordinate columns not set.' ra0 = self.get_RA() dec0 = self.get_DEC() ind = (ra0 >= ramin) & (ra0 <= ramax) & (dec0 >= decmin) & (dec0 <= decmax) if outtype <= 2: return ind else: return np.where(ind)
[docs] def where(self, condition=None, condvars=None, cone=None, zone=None, **kwargs): """ Read table data fulfilling the given `condition`. Only the rows fulfilling the `condition` are included in the result. Parameters ---------- condition: str expression to evaluate on the table includes mathematical operations and attribute names condvars: dictionary, optional A dictionary that replaces the local operands in current frame. Returns ------- out: ndarray/ tuple of ndarrays result equivalent to :func:`np.where` """ if cone is not None: if len(cone) != 3: raise ValueError('Expecting cone keywords as a triplet (ra, dec, r)') if zone is not None: if len(zone) != 4: raise ValueError('Expecting zone keywords as a tuple of 4 elements (ramin, ramax, decmin, decmax)') if condition is not None: ind = super(self.__class__, self).where(condition, **kwargs) if ind is None: if (cone is None) & (zone is None): return None else: ind = True blobs = [] if (cone is not None): ra, dec, r = cone _ind, d = self.coneSearch(ra, dec, r, outtype=1) ind = ind & _ind.astype(bool) blobs.append(d) if (zone is not None): _ind = self.zoneSearch(zone[0], zone[1], zone[2], zone[3], outtype=1) ind = ind & _ind elif (cone is not None) and (zone is not None): # cone + zone ra, dec, r = cone ind, d = self.coneSearch(ra, dec, r, outtype=2) ind = ind & self.zoneSearch(zone[0], zone[1], zone[2], zone[3], outtype=2) d = d[ind] ind = np.where(ind) blobs.append(d) return ind, blobs[0]
[docs] def selectWhere(self, fields, condition=None, condvars=None, cone=None, zone=None, **kwargs): """ Read table data fulfilling the given `condition`. Only the rows fulfilling the `condition` are included in the result. conesearch is also possible through the keyword cone formatted as (ra, dec, r) zonesearch is also possible through the keyword zone formatted as (ramin, ramax, decmin, decmax) Combination of multiple selections is also available. """ ind, blobs = self.where(condition, condvars, cone, zone, **kwargs) tab = self.select(fields, indices=ind) if cone is not None: tab.add_column('separation', np.asarray(blobs), unit='degree') if self._ra_name in tab: tab.set_RA(self._ra_name) if self._dec_name in tab: tab.set_DEC(self._dec_name) return tab
[docs]class stats(object):
[docs] @classmethod def has_nan(s, v): return (True in np.isnan(v))
[docs] @classmethod def mean(s, v): return np.nanmean(v)
[docs] @classmethod def max(s, v): return np.nanmax(v)
[docs] @classmethod def min(s, v): return np.nanmin(v)
[docs] @classmethod def std(s, v): return np.nanstd(v)
[docs] @classmethod def var(s, v): return np.var(v)
[docs] @classmethod def p16(s, v): try: return np.nanpercentile(v, 16) except AttributeError: return np.percentile(v, 16)
[docs] @classmethod def p84(s, v): try: return np.nanpercentile(v, 84) except AttributeError: return np.percentile(v, 84)
[docs] @classmethod def p50(s, v): try: return np.nanmedian(v) except AttributeError: return np.percentile(v, 50)
# ============================================================================= # Adding some plotting functions # ============================================================================= try: import pylab as plt def plot_function(tab, fn, *args, **kwargs): """ Generate a plotting method of tab from a given function Parameters ---------- tab: SimpleTable instance table instance fn: str or callable if str, will try a function in matplotlib if callable, calls the function directly xname: str expecting a column name from the table yname: str, optional if provided, another column to use for the plot onlywhere: sequence or str, optional if provided, selects only data with this condition the condition can be a ndarray slice or a string. When a string is given, the evaluation calls :func:`SimpleTable.where` ax: matplotlib.Axes instance if provided make sure it uses the axis to do the plots if a mpl function is used. Returns ------- r: object anything returned by the called function """ if not hasattr(fn, '__call__'): ax = kwargs.pop('ax', None) if ax is None: ax = plt.gca() _fn = getattr(ax, fn, None) if _fn is None: raise AttributeError('function neither callable or found in matplotlib') else: _fn = fn onlywhere = kwargs.pop('onlywhere', None) if type(onlywhere) in basestring: select = tab.where(onlywhere) else: select = onlywhere _args = () for a in args: if (hasattr(a, '__iter__')): try: b = tab[a] if select is not None: b = b.compress(select) if (len(b.dtype) > 1): b = list((b[k] for k in b.dtype.names)) _args += (b, ) except Exception as e: print(e) _args += (a, ) else: _args += (a, ) return _fn(*_args, **kwargs) def attached_function(fn, doc=None, errorlevel=0): """ eclare a function as a method to the class table""" def _fn(self, *args, **kwargs): try: return plot_function(self, fn, *args, **kwargs) except Exception as e: if errorlevel < 1: pass else: raise e if doc is not None: _fn.__doc__ = doc return _fn SimpleTable.plot_function = plot_function SimpleTable.plot = attached_function('plot', plt.plot.__doc__) SimpleTable.hist = attached_function('hist', plt.hist.__doc__) SimpleTable.hist2d = attached_function('hist2d', plt.hist2d.__doc__) SimpleTable.hexbin = attached_function('hexbin', plt.hexbin.__doc__) SimpleTable.scatter = attached_function('scatter', plt.scatter.__doc__) # newer version of matplotlib if hasattr(plt, 'violinplot'): SimpleTable.violinplot = attached_function('violinplot', plt.violinplot.__doc__) if hasattr(plt, 'boxplot'): SimpleTable.boxplot = attached_function('boxplot', plt.boxplot.__doc__) except Exception as e: print(e)