1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- # HAR classification
- # Author: Burak Himmetoglu
- # 8/15/2017
- import pandas as pd
- import numpy as np
- import os
- def read_data(data_path, split = "train"):
- """ Read data """
- # Fixed params
- n_steps = 128
- # Paths
- path_ = os.path.join(data_path, split)
- path_signals = os.path.join(path_, "Inertial_Signals")
- # Read labels and one-hot encode
- label_path = os.path.join(path_, "y_" + split + ".txt")
- labels = pd.read_csv(label_path, header = None)
- # Read time-series data
- channel_files = os.listdir(path_signals)
- channel_files.sort()
- n_channels = len(channel_files)
- posix = len(split) + 5
- # Initiate array
- list_of_channels = []
- X = np.zeros((len(labels), n_steps, n_channels))
- i_ch = 0
- for fil_ch in channel_files:
- channel_name = fil_ch[:-posix]
- dat_ = pd.read_csv(os.path.join(path_signals,fil_ch), delim_whitespace = True, header = None)
- # X[:,:,i_ch] = dat_.as_matrix()
- X[:,:,i_ch] = dat_.iloc[:,:].values
- # Record names
- list_of_channels.append(channel_name)
- # Iterate
- i_ch += 1
- return X, labels[0].values, list_of_channels
- def standardize(train, test):
- """ Standardize data """
- # Standardize train and test
- X_train = (train - np.mean(train, axis=0)[None,:,:]) / np.std(train, axis=0)[None,:,:]
- X_test = (test - np.mean(test, axis=0)[None,:,:]) / np.std(test, axis=0)[None,:,:]
- return X_train, X_test
- def one_hot(labels, n_class = 8):
- """ One-hot encoding """
- expansion = np.eye(n_class)
- y = expansion[:, labels-1].T
- assert y.shape[1] == n_class, "Wrong number of labels!"
- return y
- def get_batches(X, y, batch_size = 100):
- """ Return a generator for batches """
- n_batches = len(X) // batch_size
- X, y = X[:n_batches*batch_size], y[:n_batches*batch_size]
- # Loop over batches and yield
- for b in range(0, len(X), batch_size):
- yield X[b:b+batch_size], y[b:b+batch_size]
|