utilities.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # HAR classification
  2. # Author: Burak Himmetoglu
  3. # 8/15/2017
  4. import pandas as pd
  5. import numpy as np
  6. import os
  7. def read_data(data_path, split = "train"):
  8. """ Read data """
  9. # Fixed params
  10. n_steps = 128
  11. # Paths
  12. path_ = os.path.join(data_path, split)
  13. path_signals = os.path.join(path_, "Inertial_Signals")
  14. # Read labels and one-hot encode
  15. label_path = os.path.join(path_, "y_" + split + ".txt")
  16. labels = pd.read_csv(label_path, header = None)
  17. # Read time-series data
  18. channel_files = os.listdir(path_signals)
  19. channel_files.sort()
  20. n_channels = len(channel_files)
  21. posix = len(split) + 5
  22. # Initiate array
  23. list_of_channels = []
  24. X = np.zeros((len(labels), n_steps, n_channels))
  25. i_ch = 0
  26. for fil_ch in channel_files:
  27. channel_name = fil_ch[:-posix]
  28. dat_ = pd.read_csv(os.path.join(path_signals,fil_ch), delim_whitespace = True, header = None)
  29. # X[:,:,i_ch] = dat_.as_matrix()
  30. X[:,:,i_ch] = dat_.iloc[:,:].values
  31. # Record names
  32. list_of_channels.append(channel_name)
  33. # Iterate
  34. i_ch += 1
  35. return X, labels[0].values, list_of_channels
  36. def standardize(train, test):
  37. """ Standardize data """
  38. # Standardize train and test
  39. X_train = (train - np.mean(train, axis=0)[None,:,:]) / np.std(train, axis=0)[None,:,:]
  40. X_test = (test - np.mean(test, axis=0)[None,:,:]) / np.std(test, axis=0)[None,:,:]
  41. return X_train, X_test
  42. def one_hot(labels, n_class = 8):
  43. """ One-hot encoding """
  44. expansion = np.eye(n_class)
  45. y = expansion[:, labels-1].T
  46. assert y.shape[1] == n_class, "Wrong number of labels!"
  47. return y
  48. def get_batches(X, y, batch_size = 100):
  49. """ Return a generator for batches """
  50. n_batches = len(X) // batch_size
  51. X, y = X[:n_batches*batch_size], y[:n_batches*batch_size]
  52. # Loop over batches and yield
  53. for b in range(0, len(X), batch_size):
  54. yield X[b:b+batch_size], y[b:b+batch_size]