Source code for mojo.analysis.sampleStats
"""
sampleStats.py
==============
Module with methods to process 1D data sample with some statistical methods.
"""
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from scipy import stats
from .windowedFFT import WindowedFFT
from ..plots.plotting import SetPaperSize
def init_figure():
"""
Create a figure in 'a6' format
"""
paperformat = SetPaperSize()
paperformat.landscape("a6")
figure = plt.figure(figsize=paperformat.landscape("a6"), dpi=150)
return figure
def set_figure_axis_to_scientific():
"""
Set the Value ticks to scientific
"""
ax_formatter = ticker.ScalarFormatter(useOffset=False)
ax_formatter.set_scientific(True)
axis = plt.gca()
axis.xaxis.set_major_formatter(ax_formatter)
axis.yaxis.set_major_formatter(ax_formatter)
[docs]class SampleStats:
"""
Class to analyse data with some statistical methods
"""
def __init__(self, time, value, variable_name, windowSize=0, windowType="FLAT", windowOverlap=0):
self.__time = time
self.__value = value
self.__variable_name = variable_name
self.n_samples = self.__value.size
self.d_time = self.__time[1] - self.__time[0]
self.freq = None
self.value_freq = None
self.windowSize = windowSize
self.windowType = windowType
self.windowOverlap = windowOverlap
self.bin_center = None
self.hist = None
self.mean = 0
self.min = 0
self.max = 0
self.variance = 0
self.std_deviation = 0
self.std_error = 0
self.skew = 0
self.skewtest = None
self.kurtosis = 0
self.kurtosistest = None
self.normaltest = None
if self.n_samples < 24:
raise ValueError("At least 24 samples are necessary for the analysis.")
if self.n_samples != self.__time.size:
raise RuntimeError("The array of the time and the value must have the same length.")
def __compute_stats(self):
"""
compute some statistical values
"""
self.mean = np.mean(self.__value)
self.min = stats.tmin(self.__value)
self.max = stats.tmax(self.__value, upperlimit=None)
self.variance = stats.tvar(self.__value)
self.std_deviation = stats.tstd(self.__value)
self.std_error = stats.tsem(self.__value)
self.skew = stats.skew(self.__value)
self.skewtest = stats.skewtest(self.__value)
self.kurtosis = stats.kurtosis(self.__value)
self.kurtosistest = stats.kurtosistest(self.__value)
self.normaltest = stats.normaltest(self.__value)
def __compute_hist(self):
"""
compute the histogram of the fluctuations
"""
if self.max == self.min:
raise RuntimeError("Values for min and max are equal or compute stats beforehand")
bins = np.linspace(self.min, self.max, self.n_samples // 24)
self.bin_center = np.linspace(self.min, self.max, self.n_samples // 24 - 1)
self.hist, _ = np.histogram(self.__value, bins=bins, density=True)
def __compute_fft(self):
"""
perform a FFT of the data
"""
value_fft = WindowedFFT(func_timeDomain=self.__value - self.mean, d_time=self.d_time, window_type=self.windowType, window_size=self.windowSize, window_overlap=self.windowOverlap)
self.freq = value_fft.getSingleSidedFrequencies()
self.value_freq = value_fft.getSingleSidedSpectrum()
[docs] def analyse(self):
"""
Perform all analysis
"""
print(f"Analyse {self.__variable_name}...")
self.__compute_stats()
self.__compute_hist()
self.__compute_fft()
[docs] def print_stats(self):
"""
Print the results of the stats analysis
"""
if self.max == self.min:
raise RuntimeError("Values for min and max are equal or compute stats beforehand")
print(f"Time step size: {self.d_time}")
print(f"Number of samples: {self.n_samples}")
print(f"Mean: {self.mean}")
print(f"Minimum: {self.min}")
print(f"Maximum: {self.max}")
print(f"Variance: {self.variance}")
print(f"Standard deviation: {self.std_deviation}")
print(f"Standard error: {self.std_error}")
print(f"Skewness: {self.skew}") # should 0
print(f"Skewtest: z = {self.skewtest[0]}, p = {self.skewtest[1]}")
print(f"Kurtosis: {self.kurtosis}") # should 0
print(f"Kurtosistest: z = {self.kurtosistest[0]}, p = {self.kurtosistest[1]}")
print(f"Normaltest: k2 = {self.normaltest[0]}, p = {self.normaltest[1]}")
[docs] def print_dominatingFrequencies(self, ntop=5):
"""
Print frequencies with highest amplitudes
"""
if self.value_freq is None:
raise RuntimeError("Please perform a FFT of the data first.")
amplitudes = np.absolute(self.value_freq)
sortedIndices = np.argsort(amplitudes)[::-1][:ntop]
print("\nTop {} dominating frequencies (of {} total):".format(ntop, len(amplitudes)))
print("{:6} {:15} {:15}".format("Index", "Frequency", "Amplitude"))
for i in range(ntop):
print(f"{sortedIndices[i]:6} {self.freq[sortedIndices[i]]:15} {amplitudes[sortedIndices[i]]:15}")
print()
def __plot_time_strand(self):
"""
Create and save a time strand
"""
time_figure = init_figure()
axis = time_figure.gca()
axis.plot(self.__time, self.__value)
axis.grid(True)
plt.xlabel('Time')
plt.ylabel(self.__variable_name)
set_figure_axis_to_scientific()
file_name = f"time_{self.__variable_name}.png"
time_figure.savefig(file_name, dpi=900)
time_figure.clear()
def __plot_spectrum(self):
"""
Create and save a spectrum
"""
if self.value_freq is None:
raise RuntimeError("Please perform a FFT of the data first.")
spectrum_figure = init_figure()
axis = spectrum_figure.gca()
bar_width = 0.8 * (self.freq[1] - self.freq[0])
axis.bar(self.freq - 0.5 * bar_width, np.absolute(self.value_freq), width=bar_width)
axis.grid(True)
plt.xlabel('Frequency')
plt.ylabel(f'{self.__variable_name}(f)')
file_name = f"spectrum_{self.__variable_name}.png"
spectrum_figure.savefig(file_name, dpi=900)
spectrum_figure.clear()
def __plot_hist(self):
"""
Create and save a histogram
"""
if self.hist is None:
raise RuntimeError("Please compute the histogram of the data first.")
sigma = self.std_deviation
mean = self.mean
normal_dist = 1. / (sigma * np.sqrt(2. * np.pi)) * np.exp(-np.square(self.bin_center - mean) / (2 * pow(sigma, 2)))
bin_width = 0.8 * (self.bin_center[1] - self.bin_center[0])
hist_figure = init_figure()
axis = hist_figure.gca()
axis.bar(self.bin_center - 0.5 * bin_width, self.hist, width=bin_width)
axis.plot(self.bin_center, normal_dist, linewidth=2, color='r')
axis.grid(True)
plt.xlabel('bin')
plt.ylabel(f'Histogram({self.__variable_name})')
set_figure_axis_to_scientific()
file_name = f"hist_{self.__variable_name}.png"
hist_figure.savefig(file_name, dpi=900)
hist_figure.clear()
[docs] def plot_results(self):
"""
Plot the results in a time strand, spectrum and histogram
"""
# @todo: add special treatment for Pressure e.g. use 1/3 octave bands, SPL, PSD
self.__plot_time_strand()
self.__plot_spectrum()
self.__plot_hist()