Source code for stingray.io

import math
import copy
import os
import sys
import traceback
import warnings
from collections.abc import Iterable

import numpy as np
from astropy.io import fits
from astropy.table import Table
from astropy.logger import AstropyUserWarning
import matplotlib.pyplot as plt
from astropy.io import fits as pf

import stingray.utils as utils
from stingray.loggingconfig import setup_logger


from .utils import (
    assign_value_if_none,
    is_string,
    order_list_of_arrays,
    is_sorted,
    make_dictionary_lowercase,
)
from .gti import (
    get_gti_from_all_extensions,
    load_gtis,
    get_total_gti_length,
    split_gtis_by_exposure,
    cross_two_gtis,
)

from .mission_support import (
    read_mission_info,
    rough_calibration,
    get_rough_conversion_function,
    mission_specific_event_interpretation,
)

# Python 3
import pickle

_H5PY_INSTALLED = True
DEFAULT_FORMAT = "hdf5"

try:
    import h5py
except ImportError:
    _H5PY_INSTALLED = False
    DEFAULT_FORMAT = "pickle"

HAS_128 = True
try:
    np.float128
except AttributeError:  # pragma: no cover
    HAS_128 = False

logger = setup_logger()



[docs]
def read_rmf(rmf_file):
    """Load RMF info.

    .. note:: Preliminary: only EBOUNDS are read.

    Parameters
    ----------
    rmf_file : str
        The rmf file used to read the calibration.

    Returns
    -------
    pis : array-like
        the PI channels
    e_mins : array-like
        the lower energy bound of each PI channel
    e_maxs : array-like
        the upper energy bound of each PI channel
    """

    with pf.open(rmf_file, checksum=True, memmap=False) as lchdulist:
        lchdulist.verify("warn")
        lctable = lchdulist["EBOUNDS"].data
        pis = np.array(lctable.field("CHANNEL"))
        e_mins = np.array(lctable.field("E_MIN"))
        e_maxs = np.array(lctable.field("E_MAX"))

    return pis, e_mins, e_maxs




[docs]
def pi_to_energy(pis, rmf_file):
    """Read the energy channels corresponding to the given PI channels.

    Parameters
    ----------
    pis : array-like
        The channels to lookup in the rmf

    Other Parameters
    ----------------
    rmf_file : str
        The rmf file used to read the calibration.
    """
    calp, cal_emin, cal_emax = read_rmf(rmf_file)
    es = np.zeros(len(pis), dtype=float)
    for ic, c in enumerate(calp):
        good = pis == c
        if not np.any(good):
            continue
        es[good] = (cal_emin[ic] + cal_emax[ic]) / 2

    return es




[docs]
def get_file_extension(fname):
    """Get the extension from the file name.

    If g-zipped, add '.gz' to extension.

    Examples
    --------
    >>> get_file_extension('ciao.tar')
    '.tar'
    >>> get_file_extension('ciao.tar.gz')
    '.tar.gz'
    >>> get_file_extension('ciao.evt.gz')
    '.evt.gz'
    >>> get_file_extension('ciao.a.tutti.evt.gz')
    '.evt.gz'
    """
    fname_root = fname.replace(".gz", "")
    fname_root = os.path.splitext(fname_root)[0]

    return fname.replace(fname_root, "")




[docs]
def high_precision_keyword_read(hdr, keyword):
    """Read FITS header keywords, also if split in two.

    In the case where the keyword is split in two, like

        MJDREF = MJDREFI + MJDREFF

    in some missions, this function returns the summed value. Otherwise, the
    content of the single keyword

    Parameters
    ----------
    hdr : dict_like
        The FITS header structure, or a dictionary

    keyword : str
        The key to read in the header

    Returns
    -------
    value : long double
        The value of the key, or ``None`` if something went wrong

    """
    try:
        value = np.longdouble(hdr[keyword])
        return value
    except KeyError:
        pass
    try:
        if len(keyword) == 8:
            keyword = keyword[:7]
        value = np.longdouble(hdr[keyword + "I"])
        value += np.longdouble(hdr[keyword + "F"])
        return value
    except KeyError:
        return None



def _case_insensitive_search_in_list(string, list_of_strings):
    """Search for a string in a list of strings, in a case-insensitive way.

    Example
    -------
    >>> _case_insensitive_search_in_list("a", ["A", "b"])
    'A'
    >>> assert _case_insensitive_search_in_list("a", ["c", "b"]) is None
    """
    for s in list_of_strings:
        if string.lower() == s.lower():
            return s
    return None


def _get_additional_data(lctable, additional_columns, warn_if_missing=True):
    """Get additional data from a FITS data table.

    Parameters
    ----------
    lctable: `astropy.io.fits.fitsrec.FITS_rec`
        Data table
    additional_columns: list of str
        List of column names to retrieve from the table

    Other parameters
    ----------------
    warn_if_missing: bool, default True
        Warn if a column is not found

    Returns
    -------
    additional_data: dict
        Dictionary associating to each additional column the content of the
        table.
    """
    additional_data = {}
    if additional_columns is not None:
        for a in additional_columns:
            key = _case_insensitive_search_in_list(a, lctable._coldefs.names)
            if key is not None:
                additional_data[a] = np.array(lctable.field(key))
            else:
                if warn_if_missing:
                    warnings.warn("Column " + a + " not found")
                additional_data[a] = np.zeros(len(lctable))

    return additional_data



[docs]
def get_key_from_mission_info(info, key, default, inst=None, mode=None):
    """Get the name of a header key or table column from the mission database.

    Many entries in the mission database have default values that can be
    altered for specific instruments or observing modes. Here, if there is a
    definition for a given instrument and mode, we take that, otherwise we use
    the default).

    Parameters
    ----------
    info : dict
        Nested dictionary containing all the information for a given mission.
        It can be nested, e.g. contain some info for a given instrument, and
        for each observing mode of that instrument.
    key : str
        The key to read from the info dictionary
    default : object
        The default value. It can be of any type, depending on the expected
        type for the entry.

    Other parameters
    ----------------
    inst : str
        Instrument
    mode : str
        Observing mode

    Returns
    -------
    retval : object
        The wanted entry from the info dictionary

    Examples
    --------
    >>> info = {'ecol': 'PI', "A": {"ecol": "BLA"}, "C": {"M1": {"ecol": "X"}}}
    >>> get_key_from_mission_info(info, "ecol", "BU", inst="A", mode=None)
    'BLA'
    >>> get_key_from_mission_info(info, "ecol", "BU", inst="B", mode=None)
    'PI'
    >>> get_key_from_mission_info(info, "ecol", "BU", inst="A", mode="M1")
    'BLA'
    >>> get_key_from_mission_info(info, "ecol", "BU", inst="C", mode="M1")
    'X'
    >>> get_key_from_mission_info(info, "ghghg", "BU", inst="C", mode="M1")
    'BU'
    """
    filt_info = make_dictionary_lowercase(info, recursive=True)
    key = key.lower()
    if inst is not None:
        inst = inst.lower()
        if inst in filt_info:
            filt_info.update(filt_info[inst])
            filt_info.pop(inst)
    if mode is not None:
        mode = mode.lower()
        if mode in filt_info:
            filt_info.update(filt_info[mode])
            filt_info.pop(mode)

    if key in filt_info:
        return filt_info[key]

    return default




[docs]
def lcurve_from_fits(
    fits_file,
    gtistring="GTI",
    timecolumn="TIME",
    ratecolumn=None,
    ratehdu=1,
    fracexp_limit=0.9,
    outfile=None,
    noclobber=False,
    outdir=None,
):
    """Load a lightcurve from a fits file.

    .. note ::
        FITS light curve handling is still under testing.
        Absolute times might be incorrect depending on the light curve format.

    Parameters
    ----------
    fits_file : str
        File name of the input light curve in FITS format

    Returns
    -------
    data : dict
        Dictionary containing all information needed to create a
        :class:`stingray.Lightcurve` object

    Other Parameters
    ----------------
    gtistring : str
        Name of the GTI extension in the FITS file
    timecolumn : str
        Name of the column containing times in the FITS file
    ratecolumn : str
        Name of the column containing rates in the FITS file
    ratehdu : str or int
        Name or index of the FITS extension containing the light curve
    fracexp_limit : float
        Minimum exposure fraction allowed
    noclobber : bool
        If True, do not overwrite existing files
    """
    warnings.warn(
        """WARNING! FITS light curve handling is still under testing.
        Absolute times might be incorrect."""
    )
    # TODO:
    # treat consistently TDB, UTC, TAI, etc. This requires some documentation
    # reading. For now, we assume TDB
    from astropy.io import fits as pf
    from astropy.time import Time
    import numpy as np
    from stingray.gti import create_gti_from_condition

    lchdulist = pf.open(fits_file)
    lctable = lchdulist[ratehdu].data

    # Units of header keywords
    tunit = lchdulist[ratehdu].header["TIMEUNIT"]

    try:
        mjdref = high_precision_keyword_read(lchdulist[ratehdu].header, "MJDREF")
        mjdref = Time(mjdref, scale="tdb", format="mjd")
    except Exception:
        mjdref = None

    try:
        instr = lchdulist[ratehdu].header["INSTRUME"]
    except Exception:
        instr = "EXTERN"

    # ----------------------------------------------------------------
    # Trying to comply with all different formats of fits light curves.
    # It's a madness...
    try:
        tstart = high_precision_keyword_read(lchdulist[ratehdu].header, "TSTART")
        tstop = high_precision_keyword_read(lchdulist[ratehdu].header, "TSTOP")
    except Exception:  # pragma: no cover
        raise (Exception("TSTART and TSTOP need to be specified"))

    # For nulccorr lcs this would work

    timezero = high_precision_keyword_read(lchdulist[ratehdu].header, "TIMEZERO")
    # Sometimes timezero is "from tstart", sometimes it's an absolute time.
    # This tries to detect which case is this, and always consider it
    # referred to tstart
    timezero = assign_value_if_none(timezero, 0)

    # for lcurve light curves this should instead work
    if tunit == "d":
        # TODO:
        # Check this. For now, I assume TD (JD - 2440000.5).
        # This is likely wrong
        timezero = Time(2440000.5 + timezero, scale="tdb", format="jd")
        tstart = Time(2440000.5 + tstart, scale="tdb", format="jd")
        tstop = Time(2440000.5 + tstop, scale="tdb", format="jd")
        # if None, use NuSTAR default MJDREF
        mjdref = assign_value_if_none(
            mjdref,
            Time(np.longdouble("55197.00076601852"), scale="tdb", format="mjd"),
        )

        timezero = (timezero - mjdref).to("s").value
        tstart = (tstart - mjdref).to("s").value
        tstop = (tstop - mjdref).to("s").value

    if timezero > tstart:
        timezero -= tstart

    time = np.array(lctable.field(timecolumn), dtype=np.longdouble)
    if time[-1] < tstart:
        time += timezero + tstart
    else:
        time += timezero

    try:
        dt = high_precision_keyword_read(lchdulist[ratehdu].header, "TIMEDEL")
        if tunit == "d":
            dt *= 86400
    except Exception:
        warnings.warn(
            "Assuming that TIMEDEL is the median difference between the" " light curve times",
            AstropyUserWarning,
        )
        # Avoid NaNs
        good = time == time
        dt = np.median(np.diff(time[good]))

    # ----------------------------------------------------------------
    if ratecolumn is None:
        for name in ["RATE", "RATE1", "COUNTS"]:
            if name in lctable.names:
                ratecolumn = name
                break
        else:  # pragma: no cover
            raise ValueError("None of the accepted rate columns were found in the file")

    rate = np.array(lctable.field(ratecolumn), dtype=float)

    errorcolumn = "ERROR"
    if ratecolumn == "RATE1":
        errorcolumn = "ERROR1"

    try:
        rate_e = np.array(lctable.field(errorcolumn), dtype=np.longdouble)
    except Exception:
        rate_e = np.zeros_like(rate)

    if "RATE" in ratecolumn:
        rate *= dt
        rate_e *= dt

    try:
        fracexp = np.array(lctable.field("FRACEXP"), dtype=np.longdouble)
    except Exception:
        fracexp = np.ones_like(rate)

    good_intervals = (rate == rate) * (fracexp >= fracexp_limit)

    rate[good_intervals] /= fracexp[good_intervals]
    rate_e[good_intervals] /= fracexp[good_intervals]

    rate[~good_intervals] = 0

    try:
        gtitable = lchdulist[gtistring].data
        gti_list = np.array(
            [[a, b] for a, b in zip(gtitable.field("START"), gtitable.field("STOP"))],
            dtype=np.longdouble,
        )
    except Exception:
        gti_list = create_gti_from_condition(time, good_intervals)

    lchdulist.close()

    res = {
        "time": time,
        "counts": rate,
        "err": rate_e,
        "gti": gti_list,
        "mjdref": mjdref.mjd,
        "dt": dt,
        "instr": instr,
        "header": lchdulist[ratehdu].header.tostring(),
    }
    return res




[docs]
def load_events_and_gtis(
    fits_file,
    additional_columns=None,
    gtistring=None,
    gti_file=None,
    hduname=None,
    column=None,
):
    """Load event lists and GTIs from one or more files.

    Loads event list from HDU EVENTS of file fits_file, with Good Time
    intervals. Optionally, returns additional columns of data from the same
    HDU of the events.

    Parameters
    ----------
    fits_file : str

    Other parameters
    ----------------
    additional_columns: list of str, optional
        A list of keys corresponding to the additional columns to extract from
        the event HDU (ex.: ['PI', 'X'])
    gtistring : str
        Comma-separated list of accepted GTI extensions (default GTI,STDGTI),
        with or without appended integer number denoting the detector
    gti_file : str, default None
        External GTI file
    hduname : str or int, default 1
        Name of the HDU containing the event list
    column : str, default None
        The column containing the time values. If None, we use the name
        specified in the mission database, and if there is nothing there,
        "TIME"
    return_limits: bool, optional
        Return the TSTART and TSTOP keyword values

    Returns
    -------
    retvals : Object with the following attributes:
        ev_list : array-like
            Event times in Mission Epoch Time
        gti_list: [[gti0_0, gti0_1], [gti1_0, gti1_1], ...]
            GTIs in Mission Epoch Time
        additional_data: dict
            A dictionary, where each key is the one specified in additional_colums.
            The data are an array with the values of the specified column in the
            fits file.
        t_start : float
            Start time in Mission Epoch Time
        t_stop : float
            Stop time in Mission Epoch Time
        pi_list : array-like
            Raw Instrument energy channels
        cal_pi_list : array-like
            Calibrated PI channels (those that can be easily converted to energy
            values, regardless of the instrument setup.)
        energy_list : array-like
            Energy of each photon in keV (only for NuSTAR, NICER, XMM)
        instr : str
            Name of the instrument (e.g. EPIC-pn or FPMA)
        mission : str
            Name of the instrument (e.g. XMM or NuSTAR)
        mjdref : float
            MJD reference time for the mission
        header : str
            Full header of the FITS file, for debugging purposes
        detector_id : array-like, int
            Detector id for each photon (e.g. each of the CCDs composing XMM's or
            Chandra's instruments)
    """
    from astropy.io import fits as pf

    hdulist = pf.open(fits_file)
    probe_header = hdulist[0].header
    # Let's look for TELESCOP here. This is the most common keyword to be
    # found in well-behaved headers. If it is not in header 0, I take this key
    # and the remaining information from header 1.
    if "TELESCOP" not in probe_header:
        probe_header = hdulist[1].header
    mission_key = "MISSION"
    if mission_key not in probe_header:
        mission_key = "TELESCOP"
    mission = probe_header[mission_key].lower()

    mission_specific_processing = mission_specific_event_interpretation(mission)
    if mission_specific_processing is not None:
        mission_specific_processing(hdulist)

    db = read_mission_info(mission)
    instkey = get_key_from_mission_info(db, "instkey", "INSTRUME")
    instr = mode = None
    if instkey in probe_header:
        instr = probe_header[instkey].strip()

    modekey = get_key_from_mission_info(db, "dmodekey", None, instr)
    if modekey is not None and modekey in probe_header:
        mode = probe_header[modekey].strip()

    if gtistring is None:
        gtistring = get_key_from_mission_info(db, "gti", "GTI,STDGTI", instr, mode)
    if hduname is None:
        hduname = get_key_from_mission_info(db, "events", "EVENTS", instr, mode)

    if hduname not in hdulist:
        warnings.warn(f"HDU {hduname} not found. Trying first extension")
        hduname = 1

    datatable = hdulist[hduname].data
    header = hdulist[hduname].header

    ephem = timeref = timesys = None

    if "PLEPHEM" in header:
        # For the rare cases where this is a number, e.g. 200, I add `str`
        # It's supposed to be a string.
        ephem = str(header["PLEPHEM"]).strip().lstrip("JPL-").lower()
    if "TIMEREF" in header:
        timeref = header["TIMEREF"].strip().lower()
    if "TIMESYS" in header:
        timesys = header["TIMESYS"].strip().lower()

    if column is None:
        column = get_key_from_mission_info(db, "time", "TIME", instr, mode)
    ev_list = np.array(datatable.field(column), dtype=np.longdouble)

    detector_id = None
    ckey = get_key_from_mission_info(db, "ccol", "NONE", instr, mode)
    if ckey != "NONE" and ckey in datatable.columns.names:
        detector_id = datatable.field(ckey)

    det_number = None if detector_id is None else list(set(detector_id))

    timezero = np.longdouble(0.0)
    if "TIMEZERO" in header:
        timezero = np.longdouble(header["TIMEZERO"])

    ev_list += timezero

    t_start = ev_list[0]
    t_stop = ev_list[-1]
    if "TSTART" in header:
        t_start = np.longdouble(header["TSTART"])
    if "TSTOP" in header:
        t_stop = np.longdouble(header["TSTOP"])

    mjdref = np.longdouble(high_precision_keyword_read(header, "MJDREF"))

    # Read and handle GTI extension
    accepted_gtistrings = gtistring.split(",")

    if gti_file is None:
        # Select first GTI with accepted name
        try:
            gti_list = get_gti_from_all_extensions(
                hdulist,
                accepted_gtistrings=accepted_gtistrings,
                det_numbers=det_number,
            )
        except Exception as e:  # pragma: no cover
            warnings.warn(
                (
                    f"No valid GTI extensions found. \nError: {str(e)}\n"
                    "GTIs will be set to the entire time series."
                ),
                AstropyUserWarning,
            )
            gti_list = np.array([[t_start, t_stop]], dtype=np.longdouble)
    else:
        gti_list = load_gtis(gti_file, gtistring)

    pi_col = get_key_from_mission_info(db, "ecol", "PI", instr, mode)
    if additional_columns is None:
        additional_columns = [pi_col]
    if pi_col not in additional_columns:
        additional_columns.append(pi_col)
    # If data were already calibrated, use this!
    additional_data = _get_additional_data(datatable, additional_columns)
    if "energy" not in additional_columns:
        additional_data.update(_get_additional_data(datatable, ["energy"], warn_if_missing=False))
    del additional_columns

    hdulist.close()
    # Sort event list
    if not is_sorted(ev_list):
        warnings.warn("Warning: input data are not sorted. Sorting them for you.")
        order = np.argsort(ev_list)
        ev_list = ev_list[order]
        if detector_id is not None:
            detector_id = detector_id[order]

        additional_data = order_list_of_arrays(additional_data, order)

    pi = additional_data[pi_col].astype(np.float32)
    cal_pi = pi

    # EventReadOutput() is an empty class. We will assign a number of attributes to
    # it, like the arrival times of photons, the energies, and some information
    # from the header.
    returns = EventReadOutput()

    returns.ev_list = ev_list
    returns.gti_list = gti_list
    returns.pi_list = pi
    returns.cal_pi_list = cal_pi

    if "energy" in additional_data and np.any(additional_data["energy"] > 0.0):
        returns.energy_list = additional_data["energy"]
    else:
        try:
            func = get_rough_conversion_function(
                mission, instrument=instr, epoch=t_start / 86400 + mjdref
            )
            returns.energy_list = func(cal_pi, detector_id=detector_id)
            logger.info(
                f"A default calibration was applied to the {mission} data. "
                "See io.rough_calibration for details. "
                "Use the `rmf_file` argument in `EventList.read`, or calibrate with "
                "`EventList.convert_pi_to_energy(rmf_file)`, if you want to apply a specific "
                "response matrix"
            )
        except ValueError:
            returns.energy_list = None
    returns.instr = instr.lower()
    returns.mission = mission.lower()
    returns.mjdref = mjdref
    returns.header = header.tostring()
    returns.additional_data = additional_data
    returns.t_start = t_start
    returns.t_stop = t_stop
    returns.detector_id = detector_id
    returns.ephem = ephem
    returns.timeref = timeref
    returns.timesys = timesys

    return returns



class EventReadOutput:
    def __init__(self):
        pass


class FITSTimeseriesReader(object):
    main_array_attr = "time"

    def __init__(
        self,
        fname,
        output_class=None,
        force_hduname=None,
        gti_file=None,
        gtistring=None,
        additional_columns=None,
        data_kind="events",
    ):
        self.fname = fname
        self._meta_attrs = []
        self.gtistring = gtistring
        self.output_class = output_class
        self.additional_columns = additional_columns
        if "EventList" in str(output_class) or data_kind.lower() in ["events", "times"]:
            self._initialize_header_events(fname, force_hduname=force_hduname)
        else:
            raise NotImplementedError(
                "Only events are supported by FITSTimeseriesReader at the moment. "
                f"{data_kind} is an unknown data kind."
            )
        self.data_kind = data_kind
        if additional_columns is None and self.detector_key != "NONE":
            additional_columns = [self.detector_key]
        elif self.detector_key != "NONE":
            additional_columns.append(self.detector_key)
        self.data_hdu = fits.open(self.fname)[self.hduname]
        self.gti_file = gti_file
        self._read_gtis(self.gti_file)

    @property
    def time(self):
        return self[:].time

    def meta_attrs(self):
        return self._meta_attrs

    def _add_meta_attr(self, name, value):
        """Add a meta attribute to the object."""
        if name not in self._meta_attrs:
            self._meta_attrs.append(name)
        setattr(self, name, value)

    @property
    def exposure(self):
        """
        Return the total exposure of the time series, i.e. the sum of the GTIs.

        Returns
        -------
        total_exposure : float
            The total exposure of the time series, in seconds.
        """

        return get_total_gti_length(self.gti)

    def __getitem__(self, index):
        """Return an element or a slice of the object, e.g. ``ts[1]`` or ``ts[1:2]."""

        data = self.data_hdu.data[index]

        return self.transform_slice(data)

    def transform_slice(self, data):
        # Here there will be some logic to understand whether transfomring to events or something else

        if self.data_kind == "times":
            return data[self.time_column][:] + self.timezero
        if self.output_class is None:
            return data
        if self.data_kind == "events":
            return self._transform_slice_into_events(data)

    def _transform_slice_into_events(self, data):
        """Take a slice of data from a FITS event file and make it a StingrayTimeseries.

        Data taken from a FITS file will typically be a Numpy record array. This method
        tries to interpret the information contained in the record array based on what
        we know of the mission and the instrument. For sure, there will be a TIME column
        that will become the ``time`` array of the timeseries object. If there is a PI/PHA
        column, it will become the ``pi`` array, and if we know the conversion law for the mission,
        this will also be converted to energy. If there is an ENERGY column, it will directly
        be loaded into the energy column.
        Additional meta (e.g. GTIs, MJDREF, etc.) information will also be added to the object.

        Parameters
        ----------
        data : np.recarray
            The slice of data to transform

        Returns
        -------
        new_ts : any StingrayTimeseries subclass
            The transformed timeseries object. It will typically be an ``EventList`` object,
            but the user can change this by specifying the ``output_class`` parameter in the
            constructor of the reader.

        """
        columns = [self.time_column]
        for col in self.pi_column, self.energy_column:
            if col is not None:
                columns.append(col)
        new_ts = self.output_class()
        if self._mission_specific_processing is not None:
            data = self._mission_specific_processing(data, header=self.header, hduname=self.hduname)

        # Set the times
        setattr(
            new_ts,
            self.main_array_attr,
            data[self.time_column][:] + self.timezero,
        )
        # Get conversion function PI->Energy
        try:
            pi_energy_func = get_rough_conversion_function(
                self.mission,
                instrument=self.instr,
                epoch=self.t_start / 86400 + self.mjdref,
            )
        except ValueError:
            pi_energy_func = None

        if self.energy_column in data.dtype.names:
            new_ts.energy = data[self.energy_column]
        elif self.pi_column in data.dtype.names:
            new_ts.pi = data[self.pi_column]
            if pi_energy_func is not None:
                new_ts.energy = pi_energy_func(new_ts.pi)

        det_numbers = None
        if self.detector_key is not None and self.detector_key in data.dtype.names:
            new_ts.detector_id = data[self.detector_key]
            det_numbers = list(set(new_ts.detector_id))
            self._read_gtis(self.gti_file, det_numbers=det_numbers)

        if self.additional_columns is not None:
            for col in self.additional_columns:
                if col == self.detector_key:
                    continue
                if col in data.dtype.names:
                    setattr(new_ts, col.lower(), data[col])

        for attr in self.meta_attrs():
            local_value = getattr(self, attr)
            if attr in ["t_start", "t_stop", "gti"] and local_value is not None:
                setattr(new_ts, attr, local_value + self.timezero)
            else:
                setattr(new_ts, attr, local_value)

        return new_ts

    def _initialize_header_events(self, fname, force_hduname=None):
        """Read the header of the FITS file and set the relevant attributes.

        When possibile, some mission-specific information is read from the keywords and
        extension names found in ``xselect.mdb``.

        Parameters
        ----------
        fname : str
            The name of the FITS file to read

        Other parameters
        ----------------
        force_hduname : str or int, default None
            If not None, the name of the HDU to read. If None, an extension called
            EVENTS or the first extension.
        """
        hdulist = fits.open(fname)

        if not force_hduname:
            for hdu in hdulist:
                if "TELESCOP" in hdu.header or "MISSION" in hdu.header:
                    probe_header = hdu.header
                    break
        else:
            probe_header = hdulist[force_hduname].header

        # We need the minimal information to read the mission database.
        # That is, the name of the mission/telescope, the instrument and,
        # if available, the observing mode.
        mission_key = "MISSION"
        if mission_key not in probe_header:
            mission_key = "TELESCOP"
        self._add_meta_attr("mission", probe_header[mission_key].lower())
        self._add_meta_attr(
            "_mission_specific_processing",
            mission_specific_event_interpretation(self.mission),
        )

        # Now, we read the mission info, and we try to get the relevant
        # information from the header using the mission-specific keywords.
        db = read_mission_info(self.mission)
        instkey = get_key_from_mission_info(db, "instkey", "INSTRUME")
        instr = mode = None
        if instkey in probe_header:
            instr = probe_header[instkey].strip()

        modekey = get_key_from_mission_info(db, "dmodekey", None, instr)
        if modekey is not None and modekey in probe_header:
            mode = probe_header[modekey].strip()
        self._add_meta_attr("instr", instr)
        self._add_meta_attr("mode", mode)

        gtistring = self.gtistring

        if self.gtistring is None:
            gtistring = get_key_from_mission_info(db, "gti", "GTI,STDGTI", instr, self.mode)
        self._add_meta_attr("gtistring", gtistring)

        if force_hduname is None:
            hduname = get_key_from_mission_info(db, "events", "EVENTS", instr, self.mode)
        else:
            hduname = force_hduname

        # If the EVENT/``force_hduname`` extension is not found, try the first extension
        # which is usually the one containing the data
        if hduname not in hdulist:
            warnings.warn(f"HDU {hduname} not found. Trying first extension")
            hduname = 1
        self._add_meta_attr("hduname", hduname)

        header = hdulist[hduname].header
        if "OBS_ID" in header:
            self._add_meta_attr("obsid", header["OBS_ID"])

        # self.header has to be a string, for backwards compatibility and... for convenience!
        # No need to cope with dicts working badly with Netcdf, for example. The header
        # can be saved back and forth to files and be interpreted through
        # fits.Header.fromstring(self.header) when needed.
        self._add_meta_attr("header", hdulist[self.hduname].header.tostring())
        self._add_meta_attr("nphot", header["NAXIS2"])

        # These are the important keywords for timing.
        ephem = timeref = timesys = None
        if "PLEPHEM" in header:
            # For the rare cases where this is a number, e.g. 200, I add `str`
            # It's supposed to be a string.
            ephem = str(header["PLEPHEM"]).strip().lstrip("JPL-").lower()
        if "TIMEREF" in header:
            timeref = header["TIMEREF"].strip().lower()
        if "TIMESYS" in header:
            timesys = header["TIMESYS"].strip().lower()
        self._add_meta_attr("ephem", ephem)
        self._add_meta_attr("timeref", timeref)
        self._add_meta_attr("timesys", timesys)

        timezero = np.longdouble(0.0)
        if "TIMEZERO" in header:
            timezero = np.longdouble(header["TIMEZERO"])
        t_start = t_stop = None
        if "TSTART" in header:
            t_start = np.longdouble(header["TSTART"])
        if "TSTOP" in header:
            t_stop = np.longdouble(header["TSTOP"])
        self._add_meta_attr("timezero", timezero)
        self._add_meta_attr("t_start", t_start)
        self._add_meta_attr("t_stop", t_stop)

        self._add_meta_attr(
            "time_column",
            get_key_from_mission_info(db, "time", "TIME", instr, mode),
        )

        self._add_meta_attr(
            "detector_key",
            get_key_from_mission_info(db, "ccol", "NONE", instr, mode),
        )

        self._add_meta_attr("mjdref", np.longdouble(high_precision_keyword_read(header, "MJDREF")))

        # Try to get the information needed to calculate the event energy. We start from the
        # PI column
        default_pi_column = get_key_from_mission_info(db, "ecol", "PI", instr, self.mode)
        if default_pi_column not in hdulist[self.hduname].data.columns.names:
            default_pi_column = None
        self._add_meta_attr("pi_column", default_pi_column)

        # If a column named "energy" is found, we read it and assume the energy conversion
        # is already done.
        if "energy" in [val.lower() for val in hdulist[self.hduname].data.columns.names]:
            energy_column = "energy"
        else:
            energy_column = None
        self._add_meta_attr("energy_column", energy_column)

    def _read_gtis(self, gti_file=None, det_numbers=None):
        """Read GTIs from the FITS file."""
        # This is ugly, but if, e.g., we are reading XMM data, we *need* the
        # detector number to access GTIs.
        # So, here I'm reading a bunch of rows hoping that they represent the
        # detector number population
        if self.detector_key is not None:
            with fits.open(self.fname) as hdul:
                data = hdul[self.hduname].data
                if self.detector_key in data.dtype.names:
                    probe_vals = data[:100][self.detector_key]
                    det_numbers = list(set(probe_vals))
            del hdul

        accepted_gtistrings = self.gtistring.split(",")
        gti_list = None

        if gti_file is not None:
            self._add_meta_attr("gti", load_gtis(gti_file, self.gtistring))
            return

        # Select first GTI with accepted name
        try:
            gti_list = get_gti_from_all_extensions(
                self.fname,
                accepted_gtistrings=accepted_gtistrings,
                det_numbers=det_numbers,
            )
        except Exception as e:  # pragma: no cover
            warnings.warn(
                (
                    f"No valid GTI extensions found. \nError: {str(e)}\n"
                    "GTIs will be set to the entire time series."
                ),
            )

        self._add_meta_attr("gti", gti_list)

    def _get_idx_from_time_range(self, start, stop):
        """Get the index of the times in the event list that fall within the given time range.

        Instead of reading all the data from the file and doing ``np.searchsorted``, which could
        easily fill up the memory, this function does a two-step procedure. It first uses
        ``self._trace_nphots_in_file`` to get a grid of times and their corresponding
        indices in the file. Then, it reads only the data that strictly include the requested time
        range, and on those data it performs a searchsorted operation. The final indices will be
        summed to the lower index of the data that was read.

        Parameters
        ----------
        start : float
            Start time of the interval
        stop : float
            Stop time of the interval

        Returns
        -------
        lower_edge : int
            Index of the first photon in the requested time range
        upper_edge : int
            Index of the last photon in the requested time range
        """
        time_edges, idx_edges = self._trace_nphots_in_file(
            nedges=int(self.exposure // (stop - start)) + 2
        )

        raw_min_idx = np.searchsorted(time_edges, start, side="left")
        raw_max_idx = np.searchsorted(time_edges, stop, side="right")

        raw_min_idx = max(0, raw_min_idx - 2)
        raw_max_idx = min(time_edges.size - 1, raw_max_idx + 2)

        raw_lower_edge = idx_edges[raw_min_idx]
        raw_upper_edge = idx_edges[raw_max_idx]

        assert (
            start - time_edges[raw_min_idx] >= 0
        ), f"Start: {start}; {start - time_edges[raw_min_idx]} > 0"
        assert (
            time_edges[raw_max_idx] - stop >= 0
        ), f"Stop: {stop}; {time_edges[raw_max_idx] - stop} < 0"

        with fits.open(self.fname) as hdulist:
            filtered_times = hdulist[self.hduname].data[self.time_column][
                raw_lower_edge : raw_upper_edge + 1
            ]
            # lower_edge = np.searchsorted(filtered_times, [start, stop])
            lower_edge, upper_edge = np.searchsorted(filtered_times, [start, stop])
            # Searchsorted will find the first number above stop. We want the last number below stop!
            upper_edge -= 1

        return lower_edge + raw_lower_edge, upper_edge + raw_lower_edge

    def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT):
        """Split the event list into different files, each with a different GTI.

        Parameters
        ----------
        new_gti_lists : list of lists
            A list of lists of GTIs. Each sublist should contain a list of GTIs
            for a new file.

        Other Parameters
        ----------------
        root_file_name : str, default None
            The root name of the output files. The file name will be appended with
            "_00", "_01", etc.
            If None, a generator is returned instead of writing the files.
        fmt : str
            The format of the output files. Default is 'hdf5'.

        Returns
        -------
        output_files : list of str
            A list of the output file names.

        """

        if len(new_gti_lists[0]) == len(self.gti) and np.all(
            np.abs(np.asanyarray(new_gti_lists[0]).flatten() - self.gti.flatten()) < 1e-3
        ):
            ev = self[:]
            if root_file_name is None:
                yield ev
            else:
                output_file = root_file_name + f"_00." + fmt.lstrip(".")
                ev.write(output_file, fmt=fmt)
                yield output_file

        else:
            for i, gti in enumerate(new_gti_lists):
                if len(gti) == 0:
                    continue
                gti = np.asarray(gti)
                lower_edge, upper_edge = self._get_idx_from_time_range(gti[0, 0], gti[-1, 1])

                ev = self[lower_edge : upper_edge + 1]
                if hasattr(ev, "gti"):
                    ev.gti = gti

                if root_file_name is not None:
                    new_file = root_file_name + f"_{i:002d}." + fmt.lstrip(".")
                    logger.info(f"Writing {new_file}")
                    ev.write(new_file, fmt=fmt)
                    yield new_file
                else:
                    yield ev

    def _trace_nphots_in_file(self, nedges=1001):
        """Trace the number of photons as time advances in the file.

        This function traces the number of photons as time advances in the file.
        This is a way to quickly map the distribution of photons in time, without
        reading the entire file. This map can be useful to then access the wanted
        data without loading all the file in memory.

        Other Parameters
        ----------------
        nedges : int
            The number of time edges to trace. Default is 1001.

        Returns
        -------
        time_edges : np.ndarray
            The time edges
        idx_edges : np.ndarray
            The index edges
        """

        if hasattr(self, "_time_edges") and len(self._time_edges) >= nedges:
            return self._time_edges, self._idx_edges

        fname = self.fname

        with fits.open(fname) as hdul:
            size = hdul[1].header["NAXIS2"]
            nedges = min(nedges, size // 10 + 2)

            time_edges = np.zeros(nedges)
            idx_edges = np.zeros(nedges, dtype=int)
            for i, edge_idx in enumerate(np.linspace(0, size - 1, nedges).astype(int)):
                idx_edges[i] = edge_idx
                time_edges[i] = hdul[1].data["TIME"][edge_idx]

            mingti, maxgti = np.min(self.gti), np.max(self.gti)
            if time_edges[0] > mingti:
                time_edges[0] = mingti
            if time_edges[-1] < maxgti:
                time_edges[-1] = maxgti

        self._time_edges, self._idx_edges = time_edges, idx_edges

        return time_edges, idx_edges

    def split_by_number_of_samples(self, nsamples, root_file_name=None, fmt=DEFAULT_FORMAT):
        """Split the event list into different files, each with approx. the given no. of photons.

        Parameters
        ----------
        nsamples : int
            The number of photons in each output file.

        Other Parameters
        ----------------
        root_file_name : str, default None
            The root name of the output files. The file name will be appended with
            "_00", "_01", etc.
            If None, a generator is returned instead of writing the files.
        fmt : str
            The format of the output files. Default is 'hdf5'.

        Returns
        -------
        output_files : list of str
            A list of the output file names.
        """
        n_intervals = int(np.rint(self.nphot / nsamples))
        exposure_per_interval = self.exposure / n_intervals
        new_gti_lists = split_gtis_by_exposure(self.gti, exposure_per_interval)

        return self.apply_gti_lists(new_gti_lists, root_file_name=root_file_name, fmt=fmt)

    def filter_at_time_intervals(
        self, time_intervals, root_file_name=None, fmt=DEFAULT_FORMAT, check_gtis=True
    ):
        """Filter the event list at the given time intervals.

        Parameters
        ----------
        time_intervals : 2-d float array
            List of time intervals of the form ``[[time0_0, time0_1], [time1_0, time1_1], ...]``

        Other Parameters
        ----------------
        root_file_name : str, default None
            The root name of the output files. The file name will be appended with
            "_00", "_01", etc.
            If None, a generator is returned instead of writing the files.
        fmt : str
            The format of the output files. Default is 'hdf5'.

        Returns
        -------
        output_files : list of str
            A list of the output file names.
        """
        if len(np.shape(time_intervals)) == 1:
            time_intervals = [time_intervals]
        if check_gtis:
            new_gti = [cross_two_gtis(self.gti, [t_int]) for t_int in time_intervals]
        else:
            new_gti = [np.asarray([t_int]) for t_int in time_intervals]
        return self.apply_gti_lists(new_gti, root_file_name=root_file_name, fmt=fmt)



[docs]
def mkdir_p(path):  # pragma: no cover
    """Safe ``mkdir`` function

    Parameters
    ----------
    path : str
        The absolute path to the directory to be created
    """
    import os

    os.makedirs(path, exist_ok=True)




[docs]
def read_header_key(fits_file, key, hdu=1):
    """Read the header key key from HDU hdu of the file ``fits_file``.

    Parameters
    ----------
    fits_file: str
        The file name and absolute path to the event file.

    key: str
        The keyword to be read

    Other Parameters
    ----------------
    hdu : int
        Index of the HDU extension from which the header key to be read.

    Returns
    -------
    value : object
        The value stored under ``key`` in ``fits_file``
    """

    hdulist = fits.open(fits_file, ignore_missing_end=True)
    try:
        value = hdulist[hdu].header[key]
    except KeyError:  # pragma: no cover
        value = ""
    hdulist.close()
    return value




[docs]
def ref_mjd(fits_file, hdu=1):
    """Read ``MJDREFF``, ``MJDREFI`` or, if failed, ``MJDREF``, from the FITS header.

    Parameters
    ----------
    fits_file : str
        The file name and absolute path to the event file.

    Other Parameters
    ----------------
    hdu : int
        Index of the HDU extension from which the header key to be read.

    Returns
    -------
    mjdref : numpy.longdouble
        the reference MJD
    """

    if isinstance(fits_file, Iterable) and not is_string(fits_file):  # pragma: no cover
        fits_file = fits_file[0]
        logger.info("opening %s" % fits_file)

    hdulist = fits.open(fits_file, ignore_missing_end=True)

    ref_mjd_val = high_precision_keyword_read(hdulist[hdu].header, "MJDREF")

    hdulist.close()
    return ref_mjd_val




[docs]
def common_name(str1, str2, default="common"):
    """Strip two strings of the letters not in common.

    Filenames must be of same length and only differ by a few letters.

    Parameters
    ----------
    str1 : str
    str2 : str

    Other Parameters
    ----------------
    default : str
        The string to return if ``common_str`` is empty

    Returns
    -------
    common_str : str
        A string containing the parts of the two names in common

    """
    if not len(str1) == len(str2):
        return default
    common_str = ""
    # Extract the MP root of the name (in case they're event files)

    for i, letter in enumerate(str1):
        if str2[i] == letter:
            common_str += letter
    # Remove leading and trailing underscores and dashes
    common_str = common_str.rstrip("_").rstrip("-")
    common_str = common_str.lstrip("_").lstrip("-")
    if common_str == "":
        common_str = default
    logger.debug("common_name: %s %s -> %s" % (str1, str2, common_str))
    return common_str




[docs]
def split_numbers(number, shift=0):
    """
    Split high precision number(s) into doubles.

    You can specify the number of shifts to move the decimal point.

    Parameters
    ----------
    number: long double
        The input high precision number which is to be split

    Other parameters
    ----------------
    shift: integer
        Move the cut by `shift` decimal points to the right (left if negative)

    Returns
    -------
    number_I: double
        First part of high precision number

    number_F: double
        Second part of high precision number

    Examples
    --------
    >>> n = 12.34
    >>> i, f = split_numbers(n)
    >>> assert i == 12
    >>> assert np.isclose(f, 0.34)
    >>> assert np.allclose(split_numbers(n, 2), (12.34, 0.0))
    >>> assert np.allclose(split_numbers(n, -1), (10.0, 2.34))
    """
    if isinstance(number, Iterable):
        number = np.asanyarray(number)
        number *= 10**shift
        mods = [math.modf(n) for n in number]
        number_F = [f for f, _ in mods]
        number_I = [i for _, i in mods]
    else:
        number *= 10**shift
        number_F, number_I = math.modf(number)

    return np.double(number_I) / 10**shift, np.double(number_F) / 10**shift




[docs]
def savefig(filename, **kwargs):
    """
    Save a figure plotted by ``matplotlib``.

    Note : This function is supposed to be used after the ``plot``
    function. Otherwise it will save a blank image with no plot.

    Parameters
    ----------
    filename : str
        The name of the image file. Extension must be specified in the
        file name. For example filename with `.png` extension will give a
        rasterized image while ``.pdf`` extension will give a vectorized
        output.

    kwargs : keyword arguments
        Keyword arguments to be passed to ``savefig`` function of
        ``matplotlib.pyplot``. For example use `bbox_inches='tight'` to
        remove the undesirable whitespace around the image.
    """

    if not plt.fignum_exists(1):
        utils.simon(
            "use ``plot`` function to plot the image first and "
            "then use ``savefig`` to save the figure."
        )

    plt.savefig(filename, **kwargs)



def _can_save_longdouble(probe_file: str, fmt: str) -> bool:
    """Check if a given file format can save tables with longdoubles.

    Try to save a table with a longdouble column, and if it doesn't work, catch the exception.
    If the exception is related to longdouble, return False (otherwise just raise it, this
    would mean there are larger problems that need to be solved). In this case, also warn that
    probably part of the data will not be saved.

    If no exception is raised, return True.

    Parameters
    ----------
    probe_file : str
        The name of the file to be used for probing
    fmt : str
        The format to be used for probing, in the ``format`` argument of ``Table.write``

    Returns
    -------
    yes_it_can : bool
        Whether the format can serialize the metadata
    """
    if not HAS_128:  # pragma: no cover
        # There are no known issues with saving longdoubles where numpy.float128 is not defined
        return True

    try:
        Table({"a": np.arange(0, 3, 1.212314).astype(np.float128)}).write(
            probe_file, format=fmt, overwrite=True
        )
        yes_it_can = True
        os.unlink(probe_file)
    except ValueError as e:
        if "float128" not in str(e):  # pragma: no cover
            raise
        warnings.warn(
            f"{fmt} output does not allow saving metadata at maximum precision. "
            "Converting to lower precision"
        )
        yes_it_can = False
    return yes_it_can


def _can_serialize_meta(probe_file: str, fmt: str) -> bool:
    """
    Try to save a table with meta to be serialized, and if it doesn't work, catch the exception.
    If the exception is related to serialization, return False (otherwise just raise it, this
    would mean there are larger problems that need to be solved). In this case, also warn that
    probably part of the data will not be saved.

    If no exception is raised, return True.

    Parameters
    ----------
    probe_file : str
        The name of the file to be used for probing
    fmt : str
        The format to be used for probing, in the ``format`` argument of ``Table.write``

    Returns
    -------
    yes_it_can : bool
        Whether the format can serialize the metadata
    """
    try:
        Table({"a": [3]}).write(probe_file, overwrite=True, format=fmt, serialize_meta=True)

        os.unlink(probe_file)
        yes_it_can = True
    except TypeError as e:
        if "serialize_meta" not in str(e):  # pragma: no cover
            raise
        warnings.warn(
            f"{fmt} output does not serialize the metadata at the moment. "
            "Some attributes will be lost."
        )
        yes_it_can = False
    return yes_it_can
Navigation

Source code for stingray.io