utilities_real.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # HAR classification
  2. # Author: Burak Himmetoglu
  3. # 8/15/2017
  4. import pandas as pd
  5. import numpy as np
  6. import os
  7. def read_data(data_path, split = "train"):
  8. """ Read data """
  9. # Fixed params
  10. n_class = 6
  11. n_steps = 128
  12. # Paths
  13. path_ = os.path.join(data_path, split)
  14. path_signals = os.path.join(path_, "Inertial_Signals")
  15. # Read labels and one-hot encode
  16. label_path = os.path.join(path_, "y_" + split + ".txt")
  17. labels = pd.read_csv(label_path, header = None)
  18. # Read time-series data
  19. channel_files = os.listdir(path_signals)
  20. channel_files.sort()
  21. n_channels = len(channel_files)
  22. posix = len(split) + 5
  23. # Initiate array
  24. list_of_channels = []
  25. X = np.zeros((len(labels), n_steps, n_channels))
  26. i_ch = 0
  27. for fil_ch in channel_files:
  28. channel_name = fil_ch[:-posix]
  29. dat_ = pd.read_csv(os.path.join(path_signals,fil_ch), delim_whitespace = True, header = None)
  30. X[:,:,i_ch] = dat_.as_matrix()
  31. # Record names
  32. list_of_channels.append(channel_name)
  33. # iterate
  34. i_ch += 1
  35. # Return
  36. return X, labels[0].values, list_of_channels
  37. def standardize(train, test):
  38. """ Standardize data """
  39. # Standardize train and test
  40. X_train = (train - np.mean(train, axis=0)[None,:,:]) / np.std(train, axis=0)[None,:,:]
  41. X_test = (test - np.mean(test, axis=0)[None,:,:]) / np.std(test, axis=0)[None,:,:]
  42. return X_train, X_test
  43. def one_hot(labels, n_class = 6):
  44. """ One-hot encoding """
  45. expansion = np.eye(n_class)
  46. y = expansion[:, labels-1].T
  47. assert y.shape[1] == n_class, "Wrong number of labels!"
  48. return y
  49. def get_batches(X, y, batch_size = 100):
  50. """ Return a generator for batches """
  51. n_batches = len(X) // batch_size
  52. X, y = X[:n_batches*batch_size], y[:n_batches*batch_size]
  53. # Loop over batches and yield
  54. for b in range(0, len(X), batch_size):
  55. yield X[b:b+batch_size], y[b:b+batch_size]