Source code for mojo.analysis.sampleStats

"""
sampleStats.py
==============

Module with methods to process 1D data sample with some statistical methods.
"""

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from scipy import stats

from .windowedFFT import WindowedFFT
from ..plots.plotting import SetPaperSize


def init_figure():
    """
    Create a figure in 'a6' format
    """
    paperformat = SetPaperSize()
    paperformat.landscape("a6")
    figure = plt.figure(figsize=paperformat.landscape("a6"), dpi=150)

    return figure


def set_figure_axis_to_scientific():
    """
    Set the Value ticks to scientific
    """
    ax_formatter = ticker.ScalarFormatter(useOffset=False)
    ax_formatter.set_scientific(True)
    axis = plt.gca()
    axis.xaxis.set_major_formatter(ax_formatter)
    axis.yaxis.set_major_formatter(ax_formatter)


[docs]class SampleStats:
    """
    Class to analyse data with some statistical methods
    """

    def __init__(self, time, value, variable_name, windowSize=0, windowType="FLAT", windowOverlap=0):

        self.__time = time
        self.__value = value
        self.__variable_name = variable_name
        self.n_samples = self.__value.size
        self.d_time = self.__time[1] - self.__time[0]

        self.freq = None
        self.value_freq = None
        self.windowSize = windowSize
        self.windowType = windowType
        self.windowOverlap = windowOverlap

        self.bin_center = None
        self.hist = None

        self.mean = 0
        self.min = 0
        self.max = 0
        self.variance = 0
        self.std_deviation = 0
        self.std_error = 0
        self.skew = 0
        self.skewtest = None
        self.kurtosis = 0
        self.kurtosistest = None
        self.normaltest = None

        if self.n_samples < 24:
            raise ValueError("At least 24 samples are necessary for the analysis.")

        if self.n_samples != self.__time.size:
            raise RuntimeError("The array of the time and the value must have the same length.")

    def __compute_stats(self):
        """
        compute some statistical values
        """
        self.mean = np.mean(self.__value)
        self.min = stats.tmin(self.__value)
        self.max = stats.tmax(self.__value, upperlimit=None)
        self.variance = stats.tvar(self.__value)
        self.std_deviation = stats.tstd(self.__value)
        self.std_error = stats.tsem(self.__value)
        self.skew = stats.skew(self.__value)
        self.skewtest = stats.skewtest(self.__value)
        self.kurtosis = stats.kurtosis(self.__value)
        self.kurtosistest = stats.kurtosistest(self.__value)
        self.normaltest = stats.normaltest(self.__value)

    def __compute_hist(self):
        """
        compute the histogram of the fluctuations
        """
        if self.max == self.min:
            raise RuntimeError("Values for min and max are equal or compute stats beforehand")

        bins = np.linspace(self.min, self.max, self.n_samples // 24)

        self.bin_center = np.linspace(self.min, self.max, self.n_samples // 24 - 1)
        self.hist, _ = np.histogram(self.__value, bins=bins, density=True)

    def __compute_fft(self):
        """
        perform a FFT of the data
        """
        value_fft = WindowedFFT(func_timeDomain=self.__value - self.mean, d_time=self.d_time, window_type=self.windowType, window_size=self.windowSize, window_overlap=self.windowOverlap)
        self.freq = value_fft.getSingleSidedFrequencies()
        self.value_freq = value_fft.getSingleSidedSpectrum()

[docs]    def analyse(self):
        """
        Perform all analysis
        """
        print(f"Analyse {self.__variable_name}...")

        self.__compute_stats()
        self.__compute_hist()
        self.__compute_fft()

[docs]    def print_stats(self):
        """
        Print the results of the stats analysis
        """
        if self.max == self.min:
            raise RuntimeError("Values for min and max are equal or compute stats beforehand")

        print(f"Time step size: {self.d_time}")
        print(f"Number of samples: {self.n_samples}")
        print(f"Mean: {self.mean}")
        print(f"Minimum: {self.min}")
        print(f"Maximum: {self.max}")
        print(f"Variance: {self.variance}")
        print(f"Standard deviation: {self.std_deviation}")
        print(f"Standard error: {self.std_error}")
        print(f"Skewness: {self.skew}")  # should 0
        print(f"Skewtest: z = {self.skewtest[0]}, p = {self.skewtest[1]}")
        print(f"Kurtosis: {self.kurtosis}")  # should 0
        print(f"Kurtosistest: z = {self.kurtosistest[0]}, p = {self.kurtosistest[1]}")
        print(f"Normaltest: k2 = {self.normaltest[0]}, p = {self.normaltest[1]}")

[docs]    def print_dominatingFrequencies(self, ntop=5):
        """
        Print frequencies with highest amplitudes
        """
        if self.value_freq is None:
            raise RuntimeError("Please perform a FFT of the data first.")

        amplitudes = np.absolute(self.value_freq)
        sortedIndices = np.argsort(amplitudes)[::-1][:ntop]

        print("\nTop {} dominating frequencies (of {} total):".format(ntop, len(amplitudes)))
        print("{:6} {:15} {:15}".format("Index", "Frequency", "Amplitude"))

        for i in range(ntop):
            print(f"{sortedIndices[i]:6} {self.freq[sortedIndices[i]]:15} {amplitudes[sortedIndices[i]]:15}")
        print()

    def __plot_time_strand(self):
        """
        Create and save a time strand
        """
        time_figure = init_figure()
        axis = time_figure.gca()
        axis.plot(self.__time, self.__value)
        axis.grid(True)
        plt.xlabel('Time')
        plt.ylabel(self.__variable_name)
        set_figure_axis_to_scientific()
        file_name = f"time_{self.__variable_name}.png"
        time_figure.savefig(file_name, dpi=900)
        time_figure.clear()

    def __plot_spectrum(self):
        """
        Create and save a spectrum
        """
        if self.value_freq is None:
            raise RuntimeError("Please perform a FFT of the data first.")

        spectrum_figure = init_figure()
        axis = spectrum_figure.gca()
        bar_width = 0.8 * (self.freq[1] - self.freq[0])
        axis.bar(self.freq - 0.5 * bar_width, np.absolute(self.value_freq), width=bar_width)
        axis.grid(True)
        plt.xlabel('Frequency')
        plt.ylabel(f'{self.__variable_name}(f)')
        file_name = f"spectrum_{self.__variable_name}.png"
        spectrum_figure.savefig(file_name, dpi=900)
        spectrum_figure.clear()

    def __plot_hist(self):
        """
        Create and save a histogram
        """
        if self.hist is None:
            raise RuntimeError("Please compute the histogram of the data first.")

        sigma = self.std_deviation
        mean = self.mean
        normal_dist = 1. / (sigma * np.sqrt(2. * np.pi)) * np.exp(-np.square(self.bin_center - mean) / (2 * pow(sigma, 2)))
        bin_width = 0.8 * (self.bin_center[1] - self.bin_center[0])

        hist_figure = init_figure()
        axis = hist_figure.gca()
        axis.bar(self.bin_center - 0.5 * bin_width, self.hist, width=bin_width)
        axis.plot(self.bin_center, normal_dist, linewidth=2, color='r')
        axis.grid(True)
        plt.xlabel('bin')
        plt.ylabel(f'Histogram({self.__variable_name})')
        set_figure_axis_to_scientific()
        file_name = f"hist_{self.__variable_name}.png"
        hist_figure.savefig(file_name, dpi=900)
        hist_figure.clear()

[docs]    def plot_results(self):
        """
        Plot the results in a time strand, spectrum and histogram
        """
        # @todo: add special treatment for Pressure e.g. use 1/3 octave bands, SPL, PSD
        self.__plot_time_strand()
        self.__plot_spectrum()
        self.__plot_hist()