Source code for lstid_processing.model.analysis

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Full license can be found in License.md
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Analyse concatonated SAMI3 files."""

import datetime as dt
import numpy as np
import pandas as pds
from scipy import signal
from scipy import stats


[docs] def get_default_indices(time_array=None, lat_array=None): """Get the default indices for the C/NOFS and DMSP F16 field lines. Parameters ---------- time_array : array-like or NoneType Array of times, from which the conjugate time index may be selected. lat_array : array-like or NoneType Array of latitudes with dimensions of time x nl x nf x nz. Returns ------- nlind : int SAMI3 'nl' index for the C/NOFS and DMSP F16 location nfindc : int SAMI3 'nf' index for the C/NOFS location nfindc : int SAMI3 'nf' index for the DMSP F16 location nt : int or NoneType None if `time_array` is None, time index for the satelite conjuction otherwise nzindc : int None if `lat_array` or `time_array` is None; otherwise the altitude index for the C/NOFS location nzindd : int None if `lat_array` is None or `time_array` is None; otherwise altitude index for the DMSP F16 location Notes ----- C/NOFS is at: 11.3 N, 33.2 E, 479.4 km DMSP F16 is at: 11.3 N, 33.2 E, 845.8 km Conjunction at: 26 March 2014 at 02:49:59 UT """ # CINDI field line: 11.3 N, 33.2 E, 479.4 km # DMSP field line: 11.3 N, 33.2 E, 845.8 km nlind = 26 nfindc = 44 nfindd = 58 if time_array is None: nt = None nzindc = None nzindd = None else: nt = get_time_index(time_array, dt.datetime(2014, 3, 26, 2, 49, 59)) if lat_array is None: nzindc = None nzindd = None else: nzindc = abs(lat_array[nt, nlind, nfindc] - 11.3).argmin() nzindd = abs(lat_array[nt, nlind, nfindd] - 11.3).argmin() return nlind, nfindc, nfindd, nt, nzindc, nzindd
[docs] def get_time_index(time_array, dtime): """Retrieve the index corresponding to the closest time value. Parameters ---------- time_array : array-like Array of times, from which the desired time index may be selected. dtime : dt.datetime Desired time as a datetime object Returns ------- nt : int Time index """ nt = int(abs(pds.to_datetime(time_array).to_pydatetime() - dtime).argmin()) return nt
[docs] def get_f2_peaks(nlind, nfind, sami): """Get the F2 peak locations for a specified field line. Parameters ---------- nlind : int SAMI3 `nl` index for the field line nfind : int SAMI3 `nf` index for the field line sami : xr.Dataset Dataset with SAMI3 values Returns ------- f2_inds : dict Dict with keys 'south' and 'north' that contain arrays of indicies corresponding to the altitude indices for the F2 peak across all times. Note that these will only be valid if the field line reaches into the topside ionosphere. """ # Get the halfway point along the field line phalf = int(sami['glat'][0, nlind, nfind].shape[0] / 2) # Get the index of the density peak along each half of the field line f2_inds = { 'south': sami['dene'].values[:, nlind, nfind, :phalf].argmax(axis=1), 'north': phalf + sami['dene'].values[:, nlind, nfind, phalf:].argmax(axis=1)} return f2_inds
[docs] def get_dominant_acceleration(sami): """Get the dominant acceleration variation term indices. Parameters ---------- sami : xr.Dataset SAMI3 concatonated data with acceleration variation terms Returns ------- dom_acc : array-like Array of numbers indicating which acceleration term has the greatest variability; has values of 1, 2, and 3 Notes ----- 1 is the plasma pressure gradient, 2 is the ion-neutral collisions, and 3 is the ion-ion collisions """ # Get the indexes where the collisions are the highest ineu = np.where((sami['rel_u2_max'].values >= sami['rel_u1_max'].values) & (sami['rel_u2_max'].values >= sami['rel_u3_max'].values)) iion = np.where((sami['rel_u3_max'].values >= sami['rel_u1_max'].values) & (sami['rel_u3_max'].values >= sami['rel_u2_max'].values)) # Initalize the output to specify that plasma pressure is highest dom_acc = np.ones(shape=sami['rel_u3_max'].values.shape) # Reassign indices where collisions are greater dom_acc[ineu] = 2 dom_acc[iion] = 3 return dom_acc
[docs] def get_topside_peaks(sami, nzinds, dat_keys, dat_scale, peak_height): """Get the variation peaks across a range of altitudes. Parameters ---------- sami : xr.Dataset Dataset with SAMI3 values for the desired time period nzinds : list-like Lists of altitude indices at which peaks will be calculated dat_keys : list-like List of data keys to evaluate, expects relative variation keys like ['rel_dene_d'] dat_scale : list-like List of scaling parameters to divide from the data peak_height : list-like List of heights that peaks must meet in order to qualify in the detection, can be None to include all peaks regardless of significance Returns ------- min_ind : dict Dict with keys corresponding to data keys and values contianing a list of indices that locate significant peak minima max_ind : dict Dict with keys corresponding to data keys and values contianing a list of indices that locate significant peak maxima """ # Initialize the output min_ind = {dkey: list() for dkey in dat_keys} max_ind = {dkey: list() for dkey in dat_keys} # Cycle through each data type for i, dkey in enumerate(dat_keys): # Cycle through all altitudes for nzind in nzinds: # Find the maxima across the desired time period peak_ind, _ = signal.find_peaks( sami[dkey][:, nzind].values / dat_scale[i], height=peak_height[i]) max_ind[dkey].append(peak_ind) # Find the minima across the desired time period peak_ind, _ = signal.find_peaks( -1.0 * sami[dkey][:, nzind].values / dat_scale[i], height=peak_height[i]) min_ind[dkey].append(peak_ind) return min_ind, max_ind
[docs] def find_linear_breaks(sami, nlind, nfind, nzinds, peak_ind, lat_break=6.0, sec_break=900, max_lat=20.0): """Identify the breaks in peaks that indicate a change in wavefront. Parameters ---------- sami : xr.Dataset Dataset with SAMI3 values for the desired time period nlind : int 'nl' index for the relative data included in the minima and maxima data keys nfind : int 'nf' index for the relative data included in the minima and maxima data keys nzinds : list-like Lists of altitude indices at which peaks were calculated peak_ind : dict Dict with keys corresponding to data keys and values contianing a list of indices that locate significant peak minima or maxima lat_break : float Change in latitude that indicates a break (default=6.0) sec_break : int Change in time (seconds) that indicates a break (default=900) max_lat : float Maximum latitude for data, starting point for breaks (default=20.0) Returns ------- peak_lat : dict Latitudes corresponding to the peak indices with keys corresponding to data variables peak_sec : dict Seconds from the starting time corresponding to the peak indices with keys corresponding to data variables lat_break_inds : dict Indices of the latitude breaks with keys corresponding to data variables Notes ----- Only identifies North-to-South trends automatically """ # Initialize the output peak_lat = dict() peak_sec = dict() lat_break_inds = dict() # Cycle through all the data keys start = pds.to_datetime(sami['datetime'].values[0]).to_pydatetime() for dkey in peak_ind.keys(): # Initalize the output lists peak_lat[dkey] = list() peak_sec[dkey] = list() lat_break_inds[dkey] = list() # Cycle through the altitudes to gather the peak locations for i, nzind in enumerate(nzinds): # Get the latitudes and times for the peak locations if len(peak_ind[dkey][i]) > 0: lat = sami['glat'].values[ peak_ind[dkey][i], nlind, nfind, nzind] sec = [tval.total_seconds() for tval in ( pds.to_datetime(sami['datetime'].values[ peak_ind[dkey][i]]).to_pydatetime() - start)] # Save the output peak_lat[dkey].extend(list(lat)) peak_sec[dkey].extend(sec) # Find breaks in the locations. Start by sorting data by location # relative to the maximum in the North and time lat = max_lat - np.array(peak_lat[dkey]) isort = np.lexsort((lat, peak_sec[dkey])) peak_lat[dkey] = np.array(peak_lat[dkey])[isort] peak_sec[dkey] = np.array(peak_sec[dkey])[isort] # Calculate the latitude and time differences of the sorted data lat_diff = abs(peak_lat[dkey][1:] - peak_lat[dkey][:-1]) sec_diff = peak_sec[dkey][1:] - peak_sec[dkey][:-1] # Get the latitude break indices lat_break_inds[dkey] = np.where((lat_diff > lat_break) | (sec_diff > sec_break))[0] + 1 return peak_lat, peak_sec, lat_break_inds
[docs] def fit_lines_to_peaks(peak_lat, peak_sec, lat_break, min_pnts=40): """Calculate linear fits to the maxima and minima for each pass. Parameters ---------- peak_lat : dict Latitudes corresponding to the peak indices with keys corresponding to data variables peak_sec : dict Seconds from the starting time corresponding to the peak indices with keys corresponding to data variables lat_break : dict Indices of the latitude breaks with keys corresponding to data variables min_pnts : int Minimum number of points needed to calculate a linear fit (default=40) Returns ------- lin_fit : dict Output from scipy.stats.linregress for each valid fit period """ # Initialize the output lin_fit = {dkey: list() for dkey in peak_lat.keys()} # Cycle through all data variables for dkey in lin_fit.keys(): # Cycle through each latitude break for i, ibreak in enumerate(lat_break[dkey]): # Set the start index istart = 0 if i == 0 else lat_break[dkey][i - 1] # Get the range tmin = peak_sec[dkey][istart:ibreak][0] tmax = peak_sec[dkey][istart:ibreak][-1] # Fit the data for this pass if tmin < tmax and ibreak - istart > min_pnts: lin_fit[dkey].append([stats.linregress( peak_sec[dkey][istart:ibreak], peak_lat[dkey][istart:ibreak]), tmin, tmax]) # Set the start index for the last pass istart = lat_break[dkey][-1] # Get the range tmin = peak_sec[dkey][istart:][0] tmax = peak_sec[dkey][istart:][-1] # Fit the data for this pass if tmin < tmax and len(peak_sec[dkey]) - istart > min_pnts: lin_fit[dkey].append([stats.linregress( peak_sec[dkey][istart:], peak_lat[dkey][istart:]), tmin, tmax]) return lin_fit