Skip to content

Subsampling

Subsampler

Subsampler class.

It subsamples the data by using a subsampling method chosen among
  • Static - subsamples data by taking one sample each step-samples
  • WSDynamic - entropy based method with dynamic window size computed by breakpoint analysis
  • WSFFTStatic - entropy based method with fixed window size computed by FFT analysis
  • WSStatic - entropy base method with predefined window size
Source code in fpcmci/preprocessing/Subsampler.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
class Subsampler():
    """
    Subsampler class. 

    It subsamples the data by using a subsampling method chosen among:
        - Static - subsamples data by taking one sample each step-samples
        - WSDynamic - entropy based method with dynamic window size computed by breakpoint analysis
        - WSFFTStatic - entropy based method with fixed window size computed by FFT analysis
        - WSStatic - entropy base method with predefined window size
    """

    def __init__(self, 
                 df: pd.DataFrame, 
                 ss_method: SubsamplingMethod):
        """
        Subsampler class constructor

        Args:
            df (pd.DataFrame): dataframe to subsample
            ss_method (SubsamplingMethod): subsampling method
        """
        self.df = df
        self.ss_method = ss_method
        self.ss_method.initialise(df)


    def subsample(self):
        """
        Runs the subsampling algorithm and returns the subsapled ndarray

        Returns:
            (ndarray): Subsampled dataframe value
        """
        self.result = self.ss_method.run()
        return self.df.values[self.result, :]


    def plot_subsampled_data(self, dpi = 100, show = True):
        """
        Plot dataframe sub-sampled data

        Args:
            dpi (int, optional): image dpi. Defaults to 100.
            show (bool, optional): if True it shows the figure and block the process. Defaults to True.
        """
        n_plot = self.df.shape[1]

        # Create grid
        gs = gridspec.GridSpec(n_plot, 1)

        # Time vector
        T = list(range(0, self.df.shape[0]))

        pl.figure(dpi = dpi)
        for i in range(0, n_plot):
            ax = pl.subplot(gs[i, 0])
            pl.plot(T, self.df.values[:, i], color = 'tab:red')
            pl.scatter(np.array(T)[self.result],
                       self.df.values[self.result, i],
                       s = 80,
                       facecolors = 'none',
                       edgecolors = 'b')
            pl.gca().set(ylabel = r'$' + str(self.df.columns.values[i]) + '$')
        if show:
            pl.show()

__init__(df, ss_method)

Subsampler class constructor

Parameters:

Name Type Description Default
df pd.DataFrame

dataframe to subsample

required
ss_method SubsamplingMethod

subsampling method

required
Source code in fpcmci/preprocessing/Subsampler.py
18
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, 
             df: pd.DataFrame, 
             ss_method: SubsamplingMethod):
    """
    Subsampler class constructor

    Args:
        df (pd.DataFrame): dataframe to subsample
        ss_method (SubsamplingMethod): subsampling method
    """
    self.df = df
    self.ss_method = ss_method
    self.ss_method.initialise(df)

plot_subsampled_data(dpi=100, show=True)

Plot dataframe sub-sampled data

Parameters:

Name Type Description Default
dpi int

image dpi. Defaults to 100.

100
show bool

if True it shows the figure and block the process. Defaults to True.

True
Source code in fpcmci/preprocessing/Subsampler.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def plot_subsampled_data(self, dpi = 100, show = True):
    """
    Plot dataframe sub-sampled data

    Args:
        dpi (int, optional): image dpi. Defaults to 100.
        show (bool, optional): if True it shows the figure and block the process. Defaults to True.
    """
    n_plot = self.df.shape[1]

    # Create grid
    gs = gridspec.GridSpec(n_plot, 1)

    # Time vector
    T = list(range(0, self.df.shape[0]))

    pl.figure(dpi = dpi)
    for i in range(0, n_plot):
        ax = pl.subplot(gs[i, 0])
        pl.plot(T, self.df.values[:, i], color = 'tab:red')
        pl.scatter(np.array(T)[self.result],
                   self.df.values[self.result, i],
                   s = 80,
                   facecolors = 'none',
                   edgecolors = 'b')
        pl.gca().set(ylabel = r'$' + str(self.df.columns.values[i]) + '$')
    if show:
        pl.show()

subsample()

Runs the subsampling algorithm and returns the subsapled ndarray

Returns:

Type Description
ndarray

Subsampled dataframe value

Source code in fpcmci/preprocessing/Subsampler.py
33
34
35
36
37
38
39
40
41
def subsample(self):
    """
    Runs the subsampling algorithm and returns the subsapled ndarray

    Returns:
        (ndarray): Subsampled dataframe value
    """
    self.result = self.ss_method.run()
    return self.df.values[self.result, :]

SubsamplingMethod

Bases: ABC

SubsamplingMethod abstract class

Source code in fpcmci/preprocessing/subsampling_methods/SubsamplingMethod.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class SubsamplingMethod(ABC):
    """
    SubsamplingMethod abstract class
    """
    def __init__(self, ssmode: SSMode):
        self.ssmode = ssmode
        self.df = None


    def initialise(self, dataframe: pd.DataFrame):
        """
        Initialise class by setting the dataframe to subsample

        Args:
            dataframe (pd.DataFrame): _description_
        """
        self.df = dataframe


    @abstractmethod
    def run(self):
        """
        Run subsampler
        """
        pass

initialise(dataframe)

Initialise class by setting the dataframe to subsample

Parameters:

Name Type Description Default
dataframe pd.DataFrame

description

required
Source code in fpcmci/preprocessing/subsampling_methods/SubsamplingMethod.py
22
23
24
25
26
27
28
29
def initialise(self, dataframe: pd.DataFrame):
    """
    Initialise class by setting the dataframe to subsample

    Args:
        dataframe (pd.DataFrame): _description_
    """
    self.df = dataframe

run() abstractmethod

Run subsampler

Source code in fpcmci/preprocessing/subsampling_methods/SubsamplingMethod.py
32
33
34
35
36
37
@abstractmethod
def run(self):
    """
    Run subsampler
    """
    pass

EntropyBasedMethod

Bases: ABC

EntropyBasedMethod abstract class

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
class EntropyBasedMethod(ABC):
    """
    EntropyBasedMethod abstract class
    """
    def __init__(self, threshold):
        self.windows = list()
        self.segments = list()
        self.threshold = threshold


    def create_rounded_copy(self):
        """
        Create deepcopy of the dataframe but with rounded values

        Returns:
            (pd.DataFrame): rounded dataframe
        """
        de = deepcopy(self.df)
        de = de.round(1)
        return de


    def __normalization(self):
        """
        Normalize entropy for each moving window
        """
        max_e = max([mw.entropy for mw in self.windows])
        for mw in self.windows:
            mw.entropy = mw.entropy / max_e


    def moving_window_analysis(self):
        """
        Compute dataframe entropy on moving windows
        """
        de = self.create_rounded_copy()

        for ll, rl in self.segments:
            # Create moving window
            mw_df = de.values[ll: rl]

            # Build a Moving Window
            mw = MovingWindow(mw_df)

            # Compute entropy
            mw.get_entropy()

            # Compute optimal number of samples
            mw.optimal_sampling(self.threshold)

            # Collect result in a list
            self.windows.append(mw)

        # Entropy normalization
        self.__normalization()


    # def extract_data(self):
    #     """
    #     Extract plottable data from moving window analysis
    #     """
    #     # Entropies and samples numbers list
    #     self.__entropy_list = [mw.entropy for mw in self.__window_list]
    #     self.__sample_number_list = [mw.opt_size for mw in self.__window_list]
    #     self.__original_size = [mw.T for mw in self.__window_list]
    #     self.num_samples = sum(self.__sample_number_list)

    #     # Make entropy and sample array plottable
    #     self.__pretty_signals()


    # def __pretty_signals(self):
    #     """
    #     Make entropy list and sample number list plottable
    #     """
    #     _pretty_entropy = []
    #     _pretty_sample_number = []
    #     _pretty_original_size = []
    #     for i, mw in enumerate(self.__window_list):
    #         _pretty_entropy += np.repeat(self.__entropy_list[i], mw.T).tolist()
    #         _pretty_sample_number += np.repeat(self.__sample_number_list[i], mw.T).tolist()
    #         _pretty_original_size += np.repeat(self.__original_size[i], mw.T).tolist()
    #     self.__entropy_list = _pretty_entropy
    #     self.__sample_number_list = _pretty_sample_number
    #     self.__original_size = _pretty_original_size

    #     _diff = self.df.shape[0] - len(self.__entropy_list)
    #     if _diff != 0:
    #         self.__entropy_list = np.append(self.__entropy_list, [self.__entropy_list[-1]] * _diff)
    #         self.__sample_number_list = np.append(self.__sample_number_list, [self.__sample_number_list[-1]] * _diff)


    def extract_indexes(self):
        """
        Extract a list of indexes corresponding to the samples
        selected by the subsampling procedure
        """
        _sample_index_list = list()
        for i, mw in enumerate(self.windows):
            sum_ws = sum([wind.T for wind in self.windows[:i]])
            sample_index = [si + sum_ws for si in mw.opt_samples_index]
            _sample_index_list += sample_index
        return _sample_index_list


    @abstractmethod
    def dataset_segmentation(self):
        """
        abstract method
        """
        pass

__normalization()

Normalize entropy for each moving window

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
28
29
30
31
32
33
34
def __normalization(self):
    """
    Normalize entropy for each moving window
    """
    max_e = max([mw.entropy for mw in self.windows])
    for mw in self.windows:
        mw.entropy = mw.entropy / max_e

create_rounded_copy()

Create deepcopy of the dataframe but with rounded values

Returns:

Type Description
pd.DataFrame

rounded dataframe

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
16
17
18
19
20
21
22
23
24
25
def create_rounded_copy(self):
    """
    Create deepcopy of the dataframe but with rounded values

    Returns:
        (pd.DataFrame): rounded dataframe
    """
    de = deepcopy(self.df)
    de = de.round(1)
    return de

dataset_segmentation() abstractmethod

abstract method

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
111
112
113
114
115
116
@abstractmethod
def dataset_segmentation(self):
    """
    abstract method
    """
    pass

extract_indexes()

Extract a list of indexes corresponding to the samples selected by the subsampling procedure

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
 98
 99
100
101
102
103
104
105
106
107
108
def extract_indexes(self):
    """
    Extract a list of indexes corresponding to the samples
    selected by the subsampling procedure
    """
    _sample_index_list = list()
    for i, mw in enumerate(self.windows):
        sum_ws = sum([wind.T for wind in self.windows[:i]])
        sample_index = [si + sum_ws for si in mw.opt_samples_index]
        _sample_index_list += sample_index
    return _sample_index_list

moving_window_analysis()

Compute dataframe entropy on moving windows

Source code in fpcmci/preprocessing/subsampling_methods/EntropyBasedMethod.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def moving_window_analysis(self):
    """
    Compute dataframe entropy on moving windows
    """
    de = self.create_rounded_copy()

    for ll, rl in self.segments:
        # Create moving window
        mw_df = de.values[ll: rl]

        # Build a Moving Window
        mw = MovingWindow(mw_df)

        # Compute entropy
        mw.get_entropy()

        # Compute optimal number of samples
        mw.optimal_sampling(self.threshold)

        # Collect result in a list
        self.windows.append(mw)

    # Entropy normalization
    self.__normalization()

WSDynamic

Bases: SubsamplingMethod, EntropyBasedMethod

Subsampling method with dynamic window size based on entropy analysis

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class WSDynamic(SubsamplingMethod, EntropyBasedMethod):
    """
    Subsampling method with dynamic window size based on entropy analysis
    """
    def __init__(self, window_min_size, entropy_threshold):
        """
        WSDynamic class constructor

        Args:
            window_min_size (int): minimun window size
            entropy_threshold (float): entropy threshold

        Raises:
            ValueError: if window_min_size == None
        """
        SubsamplingMethod.__init__(self, SSMode.WSDynamic)
        EntropyBasedMethod.__init__(self, entropy_threshold)
        if window_min_size is None:
            raise ValueError("window_type = DYNAMIC but window_min_size not specified")
        self.wms = window_min_size
        self.ws = None

    def dataset_segmentation(self):
        """
        Segments dataset based on breakpoint analysis and a min window size
        """
        de = self.create_rounded_copy()
        algo = rpt.Pelt(model = "l2", min_size = self.wms).fit(de)
        seg_res = algo.predict(pen = 10)
        self.segments = [(seg_res[i - 1], seg_res[i]) for i in range(1, len(seg_res))]
        self.segments.insert(0, (0, seg_res[0]))


    def run(self):
        """
        Run subsampler

        Returns:
            (list[int]): indexes of the remaining samples
        """
        # build list of segment
        self.dataset_segmentation()

        # compute entropy moving window
        self.moving_window_analysis()

        # extracting subsampling procedure results
        idxs = self.extract_indexes()

        return idxs

__init__(window_min_size, entropy_threshold)

WSDynamic class constructor

Parameters:

Name Type Description Default
window_min_size int

minimun window size

required
entropy_threshold float

entropy threshold

required

Raises:

Type Description
ValueError

if window_min_size == None

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def __init__(self, window_min_size, entropy_threshold):
    """
    WSDynamic class constructor

    Args:
        window_min_size (int): minimun window size
        entropy_threshold (float): entropy threshold

    Raises:
        ValueError: if window_min_size == None
    """
    SubsamplingMethod.__init__(self, SSMode.WSDynamic)
    EntropyBasedMethod.__init__(self, entropy_threshold)
    if window_min_size is None:
        raise ValueError("window_type = DYNAMIC but window_min_size not specified")
    self.wms = window_min_size
    self.ws = None

dataset_segmentation()

Segments dataset based on breakpoint analysis and a min window size

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
28
29
30
31
32
33
34
35
36
def dataset_segmentation(self):
    """
    Segments dataset based on breakpoint analysis and a min window size
    """
    de = self.create_rounded_copy()
    algo = rpt.Pelt(model = "l2", min_size = self.wms).fit(de)
    seg_res = algo.predict(pen = 10)
    self.segments = [(seg_res[i - 1], seg_res[i]) for i in range(1, len(seg_res))]
    self.segments.insert(0, (0, seg_res[0]))

run()

Run subsampler

Returns:

Type Description
list[int]

indexes of the remaining samples

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def run(self):
    """
    Run subsampler

    Returns:
        (list[int]): indexes of the remaining samples
    """
    # build list of segment
    self.dataset_segmentation()

    # compute entropy moving window
    self.moving_window_analysis()

    # extracting subsampling procedure results
    idxs = self.extract_indexes()

    return idxs

WSDynamic

Bases: SubsamplingMethod, EntropyBasedMethod

Subsampling method with dynamic window size based on entropy analysis

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class WSDynamic(SubsamplingMethod, EntropyBasedMethod):
    """
    Subsampling method with dynamic window size based on entropy analysis
    """
    def __init__(self, window_min_size, entropy_threshold):
        """
        WSDynamic class constructor

        Args:
            window_min_size (int): minimun window size
            entropy_threshold (float): entropy threshold

        Raises:
            ValueError: if window_min_size == None
        """
        SubsamplingMethod.__init__(self, SSMode.WSDynamic)
        EntropyBasedMethod.__init__(self, entropy_threshold)
        if window_min_size is None:
            raise ValueError("window_type = DYNAMIC but window_min_size not specified")
        self.wms = window_min_size
        self.ws = None

    def dataset_segmentation(self):
        """
        Segments dataset based on breakpoint analysis and a min window size
        """
        de = self.create_rounded_copy()
        algo = rpt.Pelt(model = "l2", min_size = self.wms).fit(de)
        seg_res = algo.predict(pen = 10)
        self.segments = [(seg_res[i - 1], seg_res[i]) for i in range(1, len(seg_res))]
        self.segments.insert(0, (0, seg_res[0]))


    def run(self):
        """
        Run subsampler

        Returns:
            (list[int]): indexes of the remaining samples
        """
        # build list of segment
        self.dataset_segmentation()

        # compute entropy moving window
        self.moving_window_analysis()

        # extracting subsampling procedure results
        idxs = self.extract_indexes()

        return idxs

__init__(window_min_size, entropy_threshold)

WSDynamic class constructor

Parameters:

Name Type Description Default
window_min_size int

minimun window size

required
entropy_threshold float

entropy threshold

required

Raises:

Type Description
ValueError

if window_min_size == None

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def __init__(self, window_min_size, entropy_threshold):
    """
    WSDynamic class constructor

    Args:
        window_min_size (int): minimun window size
        entropy_threshold (float): entropy threshold

    Raises:
        ValueError: if window_min_size == None
    """
    SubsamplingMethod.__init__(self, SSMode.WSDynamic)
    EntropyBasedMethod.__init__(self, entropy_threshold)
    if window_min_size is None:
        raise ValueError("window_type = DYNAMIC but window_min_size not specified")
    self.wms = window_min_size
    self.ws = None

dataset_segmentation()

Segments dataset based on breakpoint analysis and a min window size

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
28
29
30
31
32
33
34
35
36
def dataset_segmentation(self):
    """
    Segments dataset based on breakpoint analysis and a min window size
    """
    de = self.create_rounded_copy()
    algo = rpt.Pelt(model = "l2", min_size = self.wms).fit(de)
    seg_res = algo.predict(pen = 10)
    self.segments = [(seg_res[i - 1], seg_res[i]) for i in range(1, len(seg_res))]
    self.segments.insert(0, (0, seg_res[0]))

run()

Run subsampler

Returns:

Type Description
list[int]

indexes of the remaining samples

Source code in fpcmci/preprocessing/subsampling_methods/WSDynamic.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def run(self):
    """
    Run subsampler

    Returns:
        (list[int]): indexes of the remaining samples
    """
    # build list of segment
    self.dataset_segmentation()

    # compute entropy moving window
    self.moving_window_analysis()

    # extracting subsampling procedure results
    idxs = self.extract_indexes()

    return idxs

Static

Bases: SubsamplingMethod

Subsamples data by taking one sample each step-samples

Source code in fpcmci/preprocessing/subsampling_methods/Static.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class Static(SubsamplingMethod):
    """
    Subsamples data by taking one sample each step-samples
    """
    def __init__(self, step):
        """
        Static class constructor

        Args:
            step (int): integer subsampling step

        Raises:
            ValueError: if step == None
        """
        super().__init__(SSMode.Static)
        if step is None:
            raise ValueError("step not specified")
        self.step = step

    def run(self):
        return range(0, len(self.df.values), self.step)

__init__(step)

Static class constructor

Parameters:

Name Type Description Default
step int

integer subsampling step

required

Raises:

Type Description
ValueError

if step == None

Source code in fpcmci/preprocessing/subsampling_methods/Static.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
def __init__(self, step):
    """
    Static class constructor

    Args:
        step (int): integer subsampling step

    Raises:
        ValueError: if step == None
    """
    super().__init__(SSMode.Static)
    if step is None:
        raise ValueError("step not specified")
    self.step = step

WSFFTStatic

Bases: SubsamplingMethod, EntropyBasedMethod

Subsampling method with static window size based on Fourier analysis

Source code in fpcmci/preprocessing/subsampling_methods/WSFFTStatic.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class WSFFTStatic(SubsamplingMethod, EntropyBasedMethod):
    """
    Subsampling method with static window size based on Fourier analysis
    """
    def __init__(self, sampling_time, entropy_threshold):
        """
        WSFFTStatic class constructor

        Args:
            sampling_time (float): timeseries sampling time
            entropy_threshold (float): entropy threshold
        """
        SubsamplingMethod.__init__(self, SSMode.WSFFTStatic)
        EntropyBasedMethod.__init__(self, entropy_threshold)
        self.sampling_time = sampling_time


    def __fourier_window(self):
        """
        Compute window size based on Fourier analysis performed on dataframe

        Returns:
            (int): window size
        """
        N, dim = self.df.shape
        xf = rfftfreq(N, self.sampling_time)
        w_array = list()
        for i in range(0, dim):
            yf = np.abs(rfft(self.df.values[:, i]))

            peak_indices, _ = scipy.signal.find_peaks(yf)
            highest_peak_index = peak_indices[np.argmax(yf[peak_indices])]
            w_array.append(ceil(1 / (2 * xf[highest_peak_index]) / self.sampling_time))
            # fig, ax = pl.subplots()
            # ax.plot(xf, yf)
            # ax.plot(xf[highest_peak_index], np.abs(yf[highest_peak_index]), "x")
            # pl.show()
        return min(w_array)


    def dataset_segmentation(self):
        """
        Segments dataset with a fixed window size
        """
        seg_res = [i for i in range(0, len(self.df.values), self.ws)]
        self.segments = [(i, i + self.ws) for i in range(0, len(self.df.values) - self.ws, self.ws)]
        if not seg_res.__contains__(len(self.df.values)):
            self.segments.append((seg_res[-1], len(self.df.values)))
            seg_res.append(len(self.df.values))


    def run(self):
        """
        Run subsampler

        Returns:
            (list[int]): indexes of the remaining samples
        """
        # define window size
        self.ws = self.__fourier_window()

        # build list of segment
        self.dataset_segmentation()

        # compute entropy moving window
        self.moving_window_analysis()

        # extracting subsampling procedure results
        idxs = self.extract_indexes()

        return idxs

__fourier_window()

Compute window size based on Fourier analysis performed on dataframe

Returns:

Type Description
int

window size

Source code in fpcmci/preprocessing/subsampling_methods/WSFFTStatic.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def __fourier_window(self):
    """
    Compute window size based on Fourier analysis performed on dataframe

    Returns:
        (int): window size
    """
    N, dim = self.df.shape
    xf = rfftfreq(N, self.sampling_time)
    w_array = list()
    for i in range(0, dim):
        yf = np.abs(rfft(self.df.values[:, i]))

        peak_indices, _ = scipy.signal.find_peaks(yf)
        highest_peak_index = peak_indices[np.argmax(yf[peak_indices])]
        w_array.append(ceil(1 / (2 * xf[highest_peak_index]) / self.sampling_time))
        # fig, ax = pl.subplots()
        # ax.plot(xf, yf)
        # ax.plot(xf[highest_peak_index], np.abs(yf[highest_peak_index]), "x")
        # pl.show()
    return min(w_array)

__init__(sampling_time, entropy_threshold)

WSFFTStatic class constructor

Parameters:

Name Type Description Default
sampling_time float

timeseries sampling time

required
entropy_threshold float

entropy threshold

required
Source code in fpcmci/preprocessing/subsampling_methods/WSFFTStatic.py
14
15
16
17
18
19
20
21
22
23
24
def __init__(self, sampling_time, entropy_threshold):
    """
    WSFFTStatic class constructor

    Args:
        sampling_time (float): timeseries sampling time
        entropy_threshold (float): entropy threshold
    """
    SubsamplingMethod.__init__(self, SSMode.WSFFTStatic)
    EntropyBasedMethod.__init__(self, entropy_threshold)
    self.sampling_time = sampling_time

dataset_segmentation()

Segments dataset with a fixed window size

Source code in fpcmci/preprocessing/subsampling_methods/WSFFTStatic.py
50
51
52
53
54
55
56
57
58
def dataset_segmentation(self):
    """
    Segments dataset with a fixed window size
    """
    seg_res = [i for i in range(0, len(self.df.values), self.ws)]
    self.segments = [(i, i + self.ws) for i in range(0, len(self.df.values) - self.ws, self.ws)]
    if not seg_res.__contains__(len(self.df.values)):
        self.segments.append((seg_res[-1], len(self.df.values)))
        seg_res.append(len(self.df.values))

run()

Run subsampler

Returns:

Type Description
list[int]

indexes of the remaining samples

Source code in fpcmci/preprocessing/subsampling_methods/WSFFTStatic.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def run(self):
    """
    Run subsampler

    Returns:
        (list[int]): indexes of the remaining samples
    """
    # define window size
    self.ws = self.__fourier_window()

    # build list of segment
    self.dataset_segmentation()

    # compute entropy moving window
    self.moving_window_analysis()

    # extracting subsampling procedure results
    idxs = self.extract_indexes()

    return idxs

WSStatic

Bases: SubsamplingMethod, EntropyBasedMethod

Entropy based subsampling method with static window size

Source code in fpcmci/preprocessing/subsampling_methods/WSStatic.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class WSStatic(SubsamplingMethod, EntropyBasedMethod):
    """
    Entropy based subsampling method with static window size
    """
    def __init__(self, window_size, entropy_threshold):
        """
        WSStatic class constructor

        Args:
            window_size (int): minimun window size
            entropy_threshold (float): entropy threshold

        Raises:
            ValueError: if window_size == None
        """

        SubsamplingMethod.__init__(self, SSMode.WSDynamic)
        EntropyBasedMethod.__init__(self, entropy_threshold)
        if window_size is None:
            raise ValueError("window_type = STATIC but window_size not specified")
        self.ws = window_size


    def dataset_segmentation(self):
        """
        Segments dataset with a fixed window size
        """
        seg_res = [i for i in range(0, len(self.df.values), self.ws)]
        self.segments = [(i, i + self.ws) for i in range(0, len(self.df.values) - self.ws, self.ws)]
        if not seg_res.__contains__(len(self.df.values)):
            self.segments.append((seg_res[-1], len(self.df.values)))
            seg_res.append(len(self.df.values))


    def run(self):
        """
        Run subsampler

        Returns:
            (list[int]): indexes of the remaining samples
        """
        # build list of segment
        self.dataset_segmentation()

        # compute entropy moving window
        self.moving_window_analysis()

        # extracting subsampling procedure results
        idxs = self.extract_indexes()

        return idxs

__init__(window_size, entropy_threshold)

WSStatic class constructor

Parameters:

Name Type Description Default
window_size int

minimun window size

required
entropy_threshold float

entropy threshold

required

Raises:

Type Description
ValueError

if window_size == None

Source code in fpcmci/preprocessing/subsampling_methods/WSStatic.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def __init__(self, window_size, entropy_threshold):
    """
    WSStatic class constructor

    Args:
        window_size (int): minimun window size
        entropy_threshold (float): entropy threshold

    Raises:
        ValueError: if window_size == None
    """

    SubsamplingMethod.__init__(self, SSMode.WSDynamic)
    EntropyBasedMethod.__init__(self, entropy_threshold)
    if window_size is None:
        raise ValueError("window_type = STATIC but window_size not specified")
    self.ws = window_size

dataset_segmentation()

Segments dataset with a fixed window size

Source code in fpcmci/preprocessing/subsampling_methods/WSStatic.py
28
29
30
31
32
33
34
35
36
def dataset_segmentation(self):
    """
    Segments dataset with a fixed window size
    """
    seg_res = [i for i in range(0, len(self.df.values), self.ws)]
    self.segments = [(i, i + self.ws) for i in range(0, len(self.df.values) - self.ws, self.ws)]
    if not seg_res.__contains__(len(self.df.values)):
        self.segments.append((seg_res[-1], len(self.df.values)))
        seg_res.append(len(self.df.values))

run()

Run subsampler

Returns:

Type Description
list[int]

indexes of the remaining samples

Source code in fpcmci/preprocessing/subsampling_methods/WSStatic.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def run(self):
    """
    Run subsampler

    Returns:
        (list[int]): indexes of the remaining samples
    """
    # build list of segment
    self.dataset_segmentation()

    # compute entropy moving window
    self.moving_window_analysis()

    # extracting subsampling procedure results
    idxs = self.extract_indexes()

    return idxs