Source code for TKEanalyst.rmspike

import numpy as np
import pandas as pd
import logging


[docs]def rmspike(vna_df, u_stats, v_stats, w_stats, w2_stats=None,
			method="velocity",
			freq=200., lambda_a=1.0, k=3.0, profile_type="lp"):
	""" Spike removal and replacement - see Nikora & Goring (1999) and Goring & Nikora (2002).

	Args:
		vna_df (pandas.DataFrame): matrix-like data array of the vna measurement file
		u_stats (pandas.DataFrame): streamwise velocity stats from flowstat function
		v_stats (pandas.DataFrame): perpendicular velocity stats from flowstat function
		w_stats (pandas.DataFrame): vertical velocity stats from flowstat function
		w2_stats (pandas.DataFrame): sec. vertical velocity stats from flowstat function (only required if profile_type is not lp)
		method (str): determines whether to use acceleration or velocity (default) for despiking
		freq (int): sampling frequency in 1/s (Hz); default is 200 Hz
		lambda_a (float): multiplier of gravitational acceleration (acceleration threshold)
		k (float): multiplier of velocity stdev (velocity threshold)
		side (str): orientation of the probe (default: DOWN, which mean probe looks like FlowTracker in a river)

	Note:
		Goring & Nikora (2002) suggest lambda_a = 1.0 ~ 1.5 and k = 1.5, but we shall use lambda_a = 1.0 and k = 3 ~ 9.
		SonTek, Nortek, and Lei recommend the SNR and correlation thresholds to be 15 and 70 respectively.
		Though data points have high SNR, the correlation can be low.
	"""

	# Acceleration a
	# Any operation with NaN will return NaN
	# Hence, only two consecutive data records (i.e. delta t = 1/f) will produce a set of acceleration components.
	# shift(1) makes the first (0) entries of a_i being np.nan (i.e. first acceleration is in 2nd row of a_i)
	print("   - calculating accelerations ...")
	a_x = (vna_df["u (m/s)"].shift(-1) - vna_df["u (m/s)"]).shift(1) * freq
	a_y = (vna_df["v (m/s)"].shift(-1) - vna_df["v (m/s)"]).shift(1) * freq
	a_z = (vna_df["w1 (m/s)"].shift(-1) - vna_df["w1 (m/s)"]).shift(1) * freq
	if not(profile_type == "lp"):
		a_z2 = (vna_df["w2 (m/s)"].shift(-1) - vna_df["w2 (m/s)"]).shift(1) * freq
		# set headers as a function of probe orientation
		hdr_spkct = ["u spikes", "v spikes", "w1 spikes", "w2 spikes"]
	else:
		# set headers as a function of probe orientation
		hdr_spkct = ["u spikes", "v spikes", "w spikes"]

	# Goring & Nikora (2002)'s Acceleration Thresholding Method (ATM) tests the acceleration thresholds first
	print("   - looking for acceleration spikes ...")
	a_cr = 9.81 * lambda_a  # acceleration threshold
	spikeu = np.nansum(np.where(~((a_x > -a_cr) & (a_x < a_cr)) & ~np.isnan(a_x), 1, 0))
	spikev = np.nansum(np.where(~((a_y > -a_cr) & (a_y < a_cr)) & ~np.isnan(a_y), 1, 0))
	spikew1 = np.nansum(np.where(~((a_z > -a_cr) & (a_z < a_cr)) & ~np.isnan(a_z), 1, 0))
	spikevct = [spikeu, spikev, spikew1]
	if not (profile_type == "lp"):
		spikew2 = np.nansum(np.where(~((a_z2 > -a_cr) & (a_z2 < a_cr)) & ~np.isnan(a_z2), 1, 0))
		spikevct.append(spikew2)

	# calculate velocity thresholds for despiking
	print("   - looking for velocity spikes ...")
	u_crl = u_stats["average"] - k * u_stats["std"]
	u_cru = u_stats["average"] + k * u_stats["std"]
	v_crl = v_stats["average"] - k * v_stats["std"]
	v_cru = v_stats["average"] + k * v_stats["std"]
	w_crl = w_stats["average"] - k * w_stats["std"]
	w_cru = w_stats["average"] + k * w_stats["std"]

	spikeu_vel = np.nansum(np.where(~((vna_df["u (m/s)"] > u_crl) & (vna_df["u (m/s)"] < u_cru)) & ~np.isnan(vna_df["u (m/s)"]), 1, 0))
	spikev_vel = np.nansum(np.where(~((vna_df["v (m/s)"] > v_crl) & (vna_df["v (m/s)"] < v_cru)) & ~np.isnan(vna_df["v (m/s)"]), 1, 0))
	spikew1_vel = np.nansum(np.where(~((vna_df["w1 (m/s)"] > w_crl) & (vna_df["w1 (m/s)"] < w_cru)) & ~np.isnan(vna_df["w1 (m/s)"]), 1, 0))
	spikevct_vel = [spikeu_vel, spikev_vel, spikew1_vel]
	if not (profile_type == "lp"):
		w2_crl = w2_stats["average"] - k * w2_stats["std"]
		w2_cru = w2_stats["average"] + k * w2_stats["std"]
		spikew2_vel = np.nansum(np.where(~((vna_df["w2 (m/s)"] > w2_crl) & (vna_df["w2 (m/s)"] < w2_cru)) & ~np.isnan(vna_df["w2 (m/s)"]), 1, 0))
		spikevct_vel.append(spikew2_vel)

	if "vel" in method:
		print("   - running select VELOCITY despiking method (provided argument: %s)" % str(method))
		vna_df["u (m/s)"] = np.where(
			~((vna_df["u (m/s)"] > u_crl) & (vna_df["u (m/s)"] < u_cru)) & ~np.isnan(vna_df["u (m/s)"]),
			np.nan, vna_df["u (m/s)"]
			)
		vna_df["v (m/s)"] = np.where(
			~((vna_df["v (m/s)"] > v_crl) & (vna_df["v (m/s)"] < v_cru)) & ~np.isnan(vna_df["v (m/s)"]),
			np.nan, vna_df["v (m/s)"]
		)
		vna_df["w1 (m/s)"] = np.where(
			~((vna_df["w1 (m/s)"] > w_crl) & (vna_df["w1 (m/s)"] < w_cru)) & ~np.isnan(vna_df["w1 (m/s)"]),
			np.nan, vna_df["w1 (m/s)"]
		)
		if not (profile_type == "lp"):
			vna_df["w2 (m/s)"] = np.where(
				~((vna_df["w2 (m/s)"] > w2_crl) & (vna_df["w2 (m/s)"] < w2_cru)) & ~np.isnan(vna_df["w2 (m/s)"]),
				np.nan, vna_df["w2 (m/s)"]
			)
	else:
		print("   - running select ACCELERATION despiking method (provided argument: %s)" % str(method))
		vna_df["u (m/s)"] = np.where(
			~((a_x > -a_cr) & (a_x < a_cr)) & ~np.isnan(a_x),
			np.nan, vna_df["u (m/s)"]
		)
		vna_df["v (m/s)"] = np.where(
			~((a_y > -a_cr) & (a_y < a_cr)) & ~np.isnan(a_y),
			np.nan, vna_df["v (m/s)"]
		)
		vna_df["w1 (m/s)"] = np.where(
			~((a_z > -a_cr) & (a_z < a_cr)) & ~np.isnan(a_z),
			np.nan, vna_df["w1 (m/s)"]
		)
		if not (profile_type == "lp"):
			vna_df["w2 (m/s)"] = np.where(
				~((a_z2 > -a_cr) & (a_z2 < a_cr)) & ~np.isnan(a_z2),
				np.nan, vna_df["w2 (m/s)"]
			)

	# create output documentation dataframe
	spike_stats = pd.DataFrame(data=[spikevct, spikevct_vel],
								columns=hdr_spkct,
								index=["acceleration", "velocity"])

	return spike_stats, vna_df