'''load_EEG takes the eeg_full dataset downloaded from
https://archive.ics.uci.edu/ml/machine-learning-databases/eeg-mld/
and loads the data, performs preprocessing, and prepares data
for use with the ml models in mlModels.py'''
#bc ┌( ಠ_ಠ)┘@thirdBrainPrograms
import logging
import mne
import numpy as np
import os
import pandas as pd
import tarfile
from mne.time_frequency import psd_welch
from scipy.integrate import simps
#configure the logger
#note: DEBUG level written to log file / INFO level printed to terminal (and file)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) #set the logging level
logFile = logging.FileHandler('eeg.log', 'w') #create the log file
logFile.setLevel(logging.DEBUG) #set the logging level for file
logger.addHandler(logFile) #add handler to log at DEBUG level
#stream out --> this will print to the terminal
stream = logging.StreamHandler()
stream.setLevel(logging.INFO) #set the logging level
logger.addHandler(stream) #add handler to log at INFO level
def importTarTransform():
'''import Transform imports the tar files within an EEG
dataset and uses a Fourier transform to get the absolute power
within frequency bands. it then spits out a raw csv file as
an intermediary step'''
#pick out the channels of interest --> modify if needed (12 channels)
channels = ['FP1', 'FP2', 'F3', 'F4', 'F7', 'F8', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2']
#frequency band cutoff values --> use 0.1HP - 40LP (4 conditions)
band_dict = {"delta": [0.1, 4], "theta": [4, 7], "alpha": [7, 12],
"beta": [12, 30], "gamma": [30, 40]}
#set sampling rate for the frequencies
sfreq = 256
#initialize the empty subject list
subjects = []
#subject is position 0 in the index
index = ["subject"]
#iterate over the channels
for channel in channels:
#iterate over the frequency bands
for band in band_dict.keys():
#create additional index headers
index.append(channel + " " + band)
#create an absolute power dataframe
df_abs_power = pd.DataFrame(index=[], columns=index)
#now that the architecture is created, check
#to see if data folder exists --> if not make one
#we will run from desktop so this will search
#desktop
if not os.path.exists('eeg_folder'):
os.makedirs('eeg_folder')
logging.debug('eeg_folder created')
#fill in the dataframe
subjects = []
k = 0
#use logging in this part
logging.debug("Loaded:")
#root prefix for subject files in dataset
for root in ['co2a0000', 'co2c0000', 'co2c1000', 'co3a0000', 'co3c0000']:
#range of subject ID numbers in the dataset
for i in range(337, 463):
#test loading two files
#for i in range(337, 339): #short form for testing
#put them together to make the identifier file
subject = root + str(i)
try:
#two slashes to avoid syntax error on Windows
#CHANGE TO ENTER YOUR COMPUTERS PATH
filename = "C:\\Users\\Owner\\Desktop\\eeg_full\\" + subject + ".tar.gz"
#drill into tar file and extract data
tar = tarfile.open(subject + ".tar.gz", "r")
#stash tar contents in the new folder
tar.extractall(path='eeg_folder')
tar.close()
except:
#some subject numbers won't be present. skip over missing
#numbers
pass
else:
#122 subjects total
for j in range(121):
#for j in range(1): #short form for testing
try:
#get the data from the newly upzipped folder
data = np.genfromtxt("eeg_folder\\" + subject + "\\" + subject +
".rd.{:03d}.gz".format(j), dtype=None, encoding=None)
#create the raw dataframe
df_raw = pd.DataFrame(data)
#store channel and frequency info as mne object
info = mne.create_info(channels, sfreq)
samples = []
for channel in channels:
#store the sampled data
samples.append(list(df_raw.loc[df_raw['f1'] == channel]['f3']))
#create mne io array
raw = mne.io.RawArray(samples, info, verbose=False)
#create new dataframe
df_a = pd.DataFrame(index=[k], columns=index)
#iterate through the subjects
k += 1
df_a['subject'] = subject
for channel in channels:
#set parameters for high pass/low pass/
#samp rate for the Fourier transform
spectra, freqs = psd_welch(raw, fmin=0.1, fmax=40,
n_fft=256, picks=channel, verbose=False)
#grab the appropriate distributions and organize
for band, lims in band_dict.items():
low, high = lims
idx_band = np.logical_and(freqs > low, freqs <= high)
freq_res = freqs[1] - freqs[0]
abs_power = simps(spectra[0][idx_band], dx=freq_res)
df_a[channel + " " + band] = abs_power
#append row to datframe (append is deprecated)
#df_abs_power = df_abs_power.append(df_a)
df_abs_power = pd.concat([df_abs_power, df_a])
except:
#skip if we miss a number / empty files etc.
pass
subjects.append(subject)
#use logging in this part
#print the loaded subjects to the command window
#and to the log file
logging.debug(subject)
#write the dataframe with the absolute powers
#to csv for further manipulation - intermediary step
df_abs_power.to_csv('EEG_powers.csv', index=True)
logging.info('EEG_powers.csv successfully created from loaded data.')
logging.info('Fourier Transform applied to loaded data.')
def main():
#run the data import
importTarTransform()
if __name__ == "__main__":
main()