PK dhHh{ datamatrix/py3compat.py#-*- coding:utf-8 -*-
"""
This file is part of OpenSesame.
OpenSesame is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OpenSesame is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OpenSesame. If not, see .
"""
import sys
if sys.version_info >= (3,0,0):
py3 = True
basestring = str
universal_newline_mode = u'r'
else:
bytes = str
str = unicode
py3 = False
universal_newline_mode = u'rU'
def safe_decode(s, enc='utf-8', errors='strict'):
if isinstance(s, str):
return s
if isinstance(s, bytes):
return s.decode(enc, errors)
# Numeric values are encoded right away
try:
assert(int(s) == float(s))
return str(int(s))
except:
try:
return str(float(s))
except:
pass
# Some types need to be converted to unicode, but require the encoding
# and errors parameters. Notable examples are Exceptions, which have
# strange characters under some locales, such as French. It even appears
# that, at least in some cases, they have to be encodeed to str first.
# Presumably, there is a better way to do this, but for now this at
# least gives sensible results.
try:
return safe_decode(bytes(s), enc=enc, errors=errors)
except:
pass
# For other types, the unicode representation doesn't require a specific
# encoding. This mostly applies to non-stringy things, such as integers.
return str(s)
def safe_encode(s, enc='utf-8', errors='strict'):
if isinstance(s, bytes):
return s
# Numeric values are encoded right away
try:
assert(int(s) == float(s))
return str(int(s)).encode()
except:
try:
return str(float(s)).encode()
except:
pass
return s.encode(enc, errors)
if py3:
safe_str = safe_decode
else:
safe_str = safe_encode
__all__ = ['py3', 'safe_decode', 'safe_encode', 'safe_str',
'universal_newline_mode']
if not py3:
__all__ += ['str', 'bytes']
else:
__all__ += ['basestring']
PK ~#Hݴw datamatrix/monkeypatch.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
def _monkey_patch_matplotlib():
"""
visible: False
desc:
This patch decorates the is_string_like function of matplotlib, because
this consider BaseColumn objects to be strings, with causes trouble when
plotting.
"""
try:
from matplotlib.axes import _base
except ImportError:
return
from datamatrix._datamatrix._basecolumn import BaseColumn
def decorate(fnc):
def inner(obj):
if isinstance(obj, BaseColumn):
return False
return fnc(obj)
return inner
_base.is_string_like = decorate(_base.is_string_like)
_monkey_patch_matplotlib()
PK \PPH) ) datamatrix/dispatch.py#-*- coding:utf-8 -*-
"""
This file is part of exparser.
exparser is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
exparser is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with exparser. If not, see .
"""
import sys
from datamatrix.py3compat import *
from datamatrix import DataMatrix, _cache
import time
import warnings
def dispatch(dm, modules=[], full=[], cache_prefix='auto_cache.'):
"""
desc:
Executes an analysis loop, in which all functions that specified on the
command are executed. A function is executed if its name is prefixed by
`@` and if it is present in one of the helpers modules. Cachable
functions are cached automatically. If a function returns a DataMatrix,
this is used to replace the current DataMatrix for the following functions.
arguments:
dm: The DataMatrix to analyze.
keywords:
modules: A module or list of modules that contain the analysis
functions.
full: A list of functions or function names that make up the full
analysis pathway.
cache_prefix:
A prefix for the cacheid for cachable functions. The
function name will be appended.
"""
if not isinstance(modules, list):
modules = [modules]
if not modules:
raise Exception('No modules specified')
print('Dispatching ...')
t0 = time.time()
if '@full' in sys.argv:
print('Running full analysis pathway')
if not full:
raise Exception('No full analysis pathway specified')
for func in full:
dm = _callfunc(dm, modules, func, cache_prefix=cache_prefix)
else:
for func in sys.argv:
if not func[0] == '@':
continue
if ':redo' in func:
func = func.replace(':redo', '')
redo = True
else:
redo = False
dm = _callfunc(dm, modules, func, cache_prefix=cache_prefix, redo=redo)
print('Dispatch finished (%.2f s)' % (time.time() - t0))
def waterfall(*pipeline):
"""
desc:
Implements a "cached waterfall", which is a series of cachable
operations which is executed from the last point onward that is not
cached.
argument-list:
pipeline: A list of (func, cacheid, kwdict) tuples. Here, func is a
cachable function, cacheid specifies the cacheid, and
kwdict is dictionary of keyword arguments to passed to func.
Each function except the first should take a DataMatrix as
the first argument. All functions should return a
DataMatrix.
returns:
type: DataMatrix
"""
print('Starting waterfall ...')
t0 = time.time()
todo = []
dm = None
for i, (func, cacheid, kwdict) in enumerate(pipeline[::-1]):
hascachefile, cachepath = _cache.cachefile(cacheid)
if not hascachefile:
todo.append( (func, cacheid, kwdict))
continue
print(u'-> Latest cache is %s' % func.__name__)
dm = _cache.readcache(cachepath)
break
for func, cacheid, kwdict in todo[::-1]:
if dm is None:
print(u'-> Running (entry point) %s' % func.__name__)
dm = func(cacheid=cacheid, **kwdict)
else:
print(u'-> Running %s' % func.__name__)
dm = func(dm, cacheid=cacheid, **kwdict)
print('Waterfall finished (%.2f s)' % (time.time() - t0))
return dm
# Private functions
def _callfunc(dm, modules, func, cache_prefix='auto_cache.', redo=False):
"""
desc:
Calls a single function from a module.
arguments:
dm: The DataMatrix to analyze.
modules: list of modules that may contain the function.
func: The function name.
keywords:
cache_prefix: A prefix for the cacheid for cachable functions. The
function name will be appended.
redo: Indicates whether functions should be redone, even if a
cache is available.
returns:
DataMatrix
"""
if func[0] == '@':
func = func[1:]
found = False
for mod in modules:
if hasattr(mod, func):
t1 = time.time()
if isinstance(func, basestring):
_func = getattr(mod, func)
else:
_func = func
if not redo and _cache.iscached(_func):
cacheid = cache_prefix + func
print('-> Calling %s.%s() [cacheid=%s]' \
% (mod.__name__, func, cacheid))
retval = _func(dm, cacheid=cacheid)
else:
print('-> Calling %s.%s() [uncached]' % (mod.__name__,
func))
retval = _func(dm)
if isinstance(retval, DataMatrix):
print('-> DataMatrix was modified')
dm = retval
print('-> Finished %s.%s() in %.2f s' % (mod.__name__, func,
time.time()-t1))
found = True
break # Break in case the same function occurs in multiple modules
if not found:
warnings.warn('Helper function %s does not exist' % func)
return dm
PK DRkH
˱ datamatrix/plot.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
import os
import sys
import numpy as np
from matplotlib import pyplot as plt
from datamatrix.colors.tango import *
plotfolder = 'plot'
if '--clear-plot' in sys.argv and os.path.exists(plotfolder):
print('Removing plot folder (%s)' % plotfolder)
import shutil
shutil.rmtree(plotfolder)
plt.style.use('ggplot')
plt.rc('font', family='liberation sans', size=10)
# Some pre-defined sizes
xs = 4, 4
s = 6, 6
ws = 6, 3
r = 8, 8
w = 12, 8
h = 8, 12
l = 12, 12
xl = 16, 16
def new(size=r):
"""
desc:
Creates a new figure.
keywords:
size:
desc: The figure size.
type: tuple
returns:
A matplotlib figure.
"""
fig = plt.figure(figsize=size)
plt.subplots_adjust(left=.15, right=.9, bottom=.15, top=.9, wspace=.3,
hspace=.3)
return fig
def trace(series, x=None, color=blue[1], err=True, **kwdict):
"""
desc:
Creates an average-trace plot.
arguments:
series:
desc: The signal.
type: SeriesColumn
keywords:
x:
desc: An array for the X axis with the same length as series, or
None for a default axis.
type: [ndarray, None]
color:
desc: The color.
type: str
label:
desc: A label for the line, or None for no label.
type: [str, None]
"""
y = series.mean
ymin = y - series.std/np.sqrt(len(series))
ymax = y + series.std/np.sqrt(len(series))
if x is None:
x = np.arange(len(y))
if err:
plt.fill_between(x, ymin, ymax, color=color, alpha=.2)
plt.plot(x, y, color=color, **kwdict)
def threshold(a, y=1, min_length=1, **kwdict):
inhit = False
for x, hit in enumerate(a):
if not inhit and hit:
onset = x
inhit = True
if inhit and not hit:
if x-onset >= min_length:
plt.plot([onset, x], [y,y], **kwdict)
inhit = False
if inhit:
if x-onset >= min_length:
plt.plot([onset, x], [y,y], **kwdict)
def regress(x, y, annotate=True, symbol='.', linestyle='--',
symbolcolor=blue[1], linecolor=blue[1], label=None):
"""
desc:
Creates a regression plot.
arguments:
x:
desc: A column for the X data.
type: BaseColumn
y:
desc: A column for the Y data.
type: BaseColumn
keywords:
annotate:
desc: Indicates whether the correlation and p-value should be
marked in the plot.
type: bool
symbol: TODO
linestyle: TODO
symbolcolor: TODO
linecolor: TODO
label: TODO
returns:
desc: The regression parameters as a (slope, intercept, correlation,
p-value, standard error) tuple
type: tuple
"""
from scipy.stats import linregress
s, i, r, p, se = linregress(x, y)
plt.plot(x, y, symbol, color=symbolcolor)
xData = np.array([min(x), max(x)])
yData = i + s*xData
plt.plot(xData, yData, linestyle, color=linecolor, label=label)
if annotate:
plt.text(0.05, 0.95, 'r = %.3f, p = %.3f' % (r, p), ha='left', \
va='top', transform=plt.gca().transAxes)
return s, i, r, p, se
def save(name, folder=None, show=False, dpi=200):
"""
desc:
Saves the current figure to the correct folder, depending on the active
experiment.
arguments:
name:
desc: The name for the figure.
type: bool
keywords:
folder:
desc: A name for a subfolder to save the plot or None to save
directly in the plotfolder.
type: [str, None]
show:
desc: Indicates whether the figure should be shown as well.
type: bool
dpi:
desc: The dots per inch to use for the png export.
type: int
"""
if folder != None:
_plotfolder = os.path.join(plotfolder, folder)
else:
_plotfolder = plotfolder
try:
os.makedirs(os.path.join(_plotfolder, 'svg'))
except:
pass
try:
os.makedirs(os.path.join(_plotfolder, 'png'))
except:
pass
pathSvg = os.path.join(_plotfolder, 'svg', '%s.svg' % name)
pathPng = os.path.join(_plotfolder, 'png', '%s.png' % name)
plt.savefig(pathSvg)
plt.savefig(pathPng, dpi=dpi)
if show or '--show' in sys.argv:
plt.show()
else:
plt.clf()
PK SRrH
4z8( 8( datamatrix/series.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
from datamatrix._datamatrix._seriescolumn import _SeriesColumn
from datamatrix import FloatColumn
from datamatrix.colors import tango
import numpy as np
from scipy.stats import nanmean, nanmedian, nanstd
from scipy.interpolate import interp1d
import warnings
def endlock(series):
endlock_series = _SeriesColumn(series._datamatrix, series.depth)
endlock_series[:] = np.nan
for i in range(len(series)):
for j in range(series.depth-1, -1, -1):
if not np.isnan(series[i,j]):
break
endlock_series[i,-j-1:] = series[i,:j+1]
return endlock_series
def reduce_(series, operation=nanmean):
"""
desc:
Transforms series to single values by applying an operation (typically
a mean) to each series.
arguments:
series:
desc: The signal to reduce.
type: SeriesColumn
keywords:
operation:
desc: The operation function to use for the reduction. This
function should accept `series` as first argument, and
`axis=1` as keyword argument.
returns:
desc: A reduction of the signal.
type: FloatColumn
"""
col = FloatColumn(series._datamatrix)
try:
a = operation(series, axis=1)
except TypeError:
for i, val in enumerate(series):
col[i] = operation(val)
else:
col[:] = a
return col
def window(series, start=0, end=None):
"""
desc:
Extracts a window from a signal.
arguments:
series:
desc: The signal to get a window from.
type: SeriesColumn
keywords:
start:
desc: The window start.
type: int
end:
desc: The window end, or None to go to the signal end.
type: [int, None]
returns:
desc: A window of the signal.
type: SeriesColumn
"""
if end is None:
end = series.depth
a = series[:,start:end]
depth = a.shape[1]
window_series = _SeriesColumn(series._datamatrix, depth)
window_series[:] = a
return window_series
def baseline(series, baseline, bl_start=-100, bl_end=None, reduce_fnc=None):
"""
desc:
Applies a baseline to a signal
arguments:
series:
desc: The signal to apply a baseline to.
type: SeriesColumn
baseline:
desc: The signal to use as a baseline to.
type: SeriesColumn
keywords:
bl_start:
desc: The start of the window from `baseline` to use.
type: int
bl_end:
desc: The end of the window from `baseline` to use, or None to go
to the end.
type: [int, None]
reduce_fnc:
desc: The function to reduce the baseline epoch to a single value.
If None, np.nanmedian() is used.
type: [FunctionType, None]
returns:
desc: A baseline-correct version of the signal.
type: SeriesColumn
"""
if reduce_fnc is None:
reduce_fnc = nanmedian
baseline = reduce_(window(baseline, start=bl_start, end=bl_end),
operation=reduce_fnc)
return series / baseline
def blinkreconstruct(series, vt=5, maxdur=500, margin=10):
"""
Source:
Mathot, S. (2013). A simple way to reconstruct pupil size during eye
blinks. http://doi.org/10.6084/m9.figshare.688002
desc:
Reconstructs pupil size during blinks.
arguments:
series:
desc: A signal to reconstruct.
type: SeriesColumn
keywords:
vt:
desc: A pupil velocity threshold. Lower tresholds more easily
trigger blinks.
type: [int, float]
maxdur:
desc: The maximum duration (in samples) for a blink. Longer
blinks are not reconstructed.
type: int
margin:
desc: The margin to take around missing data.
type: int
returns:
desc: A reconstructed singal.
type: SeriesColumn
"""
return _apply_fnc(series, _blinkreconstruct, vt=vt, maxdur=500,
margin=margin)
def smooth(series, winlen=11, wintype='hanning', correctlen=True):
"""
desc:
Source:
Smooths a signal using a window with requested size.
This method is based on the convolution of a scaled window with the
signal. The signal is prepared by introducing reflected copies of the
signal (with the window size) in both ends so that transient parts are
minimized in the begining and end part of the output signal.
arguments:
series:
desc: A signal to smooth.
type: SeriesColumn
keywords:
winlen:
desc: The width of the smoothing window. This should be an odd
integer.
type: int
wintype:
desc: The type of window from 'flat', 'hanning', 'hamming',
'bartlett', 'blackman'. A flat window produces a moving
average smoothing.
type: str
correctlen:
desc: Indicates whether the return string should be the same
length as the input string.
type: bool
returns:
desc: A smoothed signal.
type: SeriesColumn
"""
return _apply_fnc(series, _smooth, winlen=winlen, wintype=wintype,
correctlen=correctlen)
def threshold(series, fnc, min_length=1):
"""
desc:
Finds samples that satisfy some threshold criterion for a given period.
arguments:
series:
desc: A signal to threshold.
type: SeriesColumn
fnc:
desc: A function that takes a single value and returns True if
this value exceeds a threshold, and False otherwise.
type: FunctionType
keywords:
min_length:
desc: The minimum number of samples for which `fnc` must return
True.
type: int
returns:
desc: A series where 0 indicates below threshold, and 1 indicates
above threshold.
type: SeriesColumn
"""
threshold_series = _SeriesColumn(series._datamatrix, series.depth)
threshold_series[:] = 0
# First walk through all rows
for i, trace in enumerate(series):
print()
# Then walk through all samples within a row
nhit = 0
for j, val in enumerate(trace):
hit = fnc(val)
if hit:
nhit += 1
continue
if nhit >= min_length:
threshold_series[i,j-nhit:j] = 1
nhit = 0
if nhit >= min_length:
threshold_series[i,j-nhit:j] = 1
return threshold_series
# Private functions
def _apply_fnc(series, fnc, **kwdict):
"""
visible: False
desc:
Applies a function to each cell.
arguments:
series:
desc: A signal to apply the function to.
type: SeriesColumn
fnc:
desc: The function to apply.
keyword-dict:
kwdict: A dict with keyword arguments for fnc.
returns:
desc: A new signal.
type: SeriesColumn
"""
new_series = _SeriesColumn(series._datamatrix, depth=series.depth)
for i, cell in enumerate(series):
new_series[i] = fnc(cell, **kwdict)
return new_series
def _blinkreconstruct(a, vt=5, maxdur=500, margin=10, smooth_winlen=21,
std_thr=3):
"""
visible: False
desc:
Reconstructs a single array.
"""
# Create a copy of the signal, a smoothed version, and calculate the
# velocity profile.
a = np.copy(a)
try:
strace = _smooth(a, winlen=smooth_winlen)
except Exception as e:
warnings.warn(str(e))
strace = a
vtrace = strace[1:]-strace[:-1]
# Start blink detection
ifrom = 0
lblink = []
while True:
# The onset of the blink is the moment at which the pupil velocity
# exceeds the threshold.
l = np.where(vtrace[ifrom:] < -vt)[0]
if len(l) == 0:
break # No blink detected
istart = l[0]+ifrom
if ifrom == istart:
break
# The reversal period is the moment at which the pupil starts to dilate
# again with a velocity above threshold.
l = np.where(vtrace[istart:] > vt)[0]
if len(l) == 0:
ifrom = istart
continue
imid = l[0]+istart
# The end blink period is the moment at which the pupil velocity drops
# back to zero again.
l = np.where(vtrace[imid:] < 0)[0]
if len(l) == 0:
ifrom = imid
continue
iend = l[0]+imid
ifrom = iend
# We generally underestimate the blink period, so compensate for this
if istart-margin >= 0:
istart -= margin
if iend+margin < len(a):
iend += margin
# We don't accept blinks that are too long, because blinks are not
# generally very long (although they can be).
if iend-istart > maxdur:
ifrom = istart+maxdur//10
continue
lblink.append( (istart, iend) )
# Now reconstruct the trace during the blinks
for istart, iend in lblink:
# First create a list of (when possible) four data points that we can
# use for interpolation.
dur = iend - istart
l = []
if istart-dur >= 0:
l += [istart-dur]
l += [istart, iend]
if iend+dur < len(strace):
l += [iend+dur]
x = np.array(l)
# If the list is long enough we use cubic interpolation, otherwise we
# use linear interpolation
y = a[x]
if len(x) >= 4:
f2 = interp1d(x, y, kind='cubic')
else:
f2 = interp1d(x, y)
xInt = np.arange(istart, iend)
yInt = f2(xInt)
a[xInt] = yInt
# For all remaining gaps, replace them with the previous sample if available
b = np.where( (a < (a.mean()-std_thr*a.std())) \
| (a.mean() > (a+std_thr*a.std())) \
| np.isnan(a) )[0]
for i in b:
if i == 0:
continue
a[i] = a[i-1]
return a
def _smooth(a, winlen=11, wintype='hanning', correctlen=True):
"""
visible: False
desc:
Smooths a single array.
"""
if a.ndim != 1:
raise ValueError("smooth only accepts 1 dimension arrays.")
if a.size < winlen:
raise ValueError("Input vector needs to be bigger than window size.")
if winlen < 3:
return a
if not wintype in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise ValueError(
"Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
s = np.r_[a[winlen-1:0:-1], a, a[-1:-winlen:-1]]
if wintype == 'flat': #moving average
w = np.ones(winlen, 'd')
else:
func = getattr(np, wintype)
w = func(winlen)
y = np.convolve(w/w.sum(), s, mode='valid')
if correctlen:
y = y[(winlen/2-1):-(winlen/2)]
# The output array can be one shorter than the input array
if len(y) > len(a):
y = y[:len(a)]
elif len(y) < len(a):
raise Exception('The output array is too short!')
return y
PK |MPH[qٞ datamatrix/_cache.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
import os
import sys
import time
import pickle
import shutil
cache_initialized = False
skipcache = '--no-cache' in sys.argv
cachefolder = '.cache'
protocol = pickle.HIGHEST_PROTOCOL
def init_cache():
"""
desc:
Initializes the cache system.
"""
global cache_initialized
if cache_initialized:
return
cache_initialized = True
print(u'Initializing cache ...')
if '--clear-cache' in sys.argv and os.path.exists(cachefolder):
print(u'Removing cache folder (%s)' % cachefolder)
shutil.rmtree(cachefolder)
if not os.path.exists(cachefolder):
print(u'Creating cache folder (%s)' % cachefolder)
os.mkdir(cachefolder)
def cached(func):
"""
desc:
A decorator function that provides a cache for functions that return
a pickable value.
"""
def inner(*args, **kwargs):
iscached = True
if 'cacheid' in kwargs:
hascachefile, cachepath = cachefile(kwargs['cacheid'])
del kwargs['cacheid']
else:
cachepath = None
if skipcache or cachepath is None or not hascachefile:
print('@cached: calling %s' % func)
a = func(*args, **kwargs)
if cachepath is not None:
print('@cached: saving %s' % cachepath)
writecache(a, cachepath)
else:
ctime = time.ctime(os.path.getctime(cachepath))
print('@cached: loading %s (created %s)' % (cachepath, ctime))
a = readcache(cachepath)
return a
init_cache()
inner.__name__ = func.__name__
return inner
def iscached(func):
"""
desc:
Checks whether a function is cachable.
returns:
desc: True if cachable, False otherwise.
type: false
"""
init_cache()
if py3:
return 'iscached' in func.__code__.co_varnames
return 'iscached' in func.func_code.co_varnames
def cachefile(cacheid):
"""
desc:
Gets the cachefile for a cacheid, and checks whether this file exists.
arguments:
cacheid: The cacheid.
returns:
A (cache_exists, cachepath) tuple, where the first is a boolean that
indicates if the second exists.
"""
init_cache()
path = os.path.join(cachefolder, cacheid) + '.pkl'
if os.path.exists(path):
return True, path
return False, path
def readcache(cachepath):
"""
desc:
Reads an object from a cachefile.
arguments:
cachepath: The full path to the cachefile.
returns:
An object that was cached.
"""
init_cache()
with open(cachepath, u'rb') as fd:
return pickle.load(fd)
def writecache(a, cachepath):
"""
desc:
Writes a cachefile for an object.
arguments:
a: The object to cache. This object should be pickleable.
cachepath: The full path to the cachefile.
"""
init_cache()
with open(cachepath, u'wb') as fd:
pickle.dump(a, fd, protocol)
PK ffkH4I datamatrix/convert.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
from datamatrix import DataMatrix
try:
import pandas as pd
except ImportError:
pd = None
def wrap_pandas(fnc):
"""
visible: False
desc:
A decorator for pandas functions. It converts a DataMatrix to a
DataFrame, passes it to a function, and then converts the returned
DataFrame back to a DataMatrix.
"""
def inner(dm, *arglist, **kwdict):
df_in = to_pandas(dm)
df_out = fnc(df_in, *arglist, **kwdict)
return from_pandas(df_out)
inner.__doc__ = u'desc: A simple wrapper around the corresponding pandas function'
return inner
def to_pandas(dm):
"""
desc:
Converts a DataMatrix to a pandas DataFrame.
arguments:
dm:
type: DataMatrix
returns:
type: DataFrame
"""
d = {}
for colname, col in dm.columns:
d[colname] = list(col)
return pd.DataFrame(d)
def from_pandas(df):
"""
desc:
Converts a pandas DataFrame to a DataMatrix.
arguments:
dm:
type: DataFrame
returns:
type: DataMatrix
"""
from datamatrix import operations as ops
dm = DataMatrix(length=len(df))
for colname in df.columns:
if isinstance(colname, tuple):
_colname = u'_'.join([str(i) for i in colname])
else:
_colname = colname
try:
exec('%s = None' % _colname)
except SyntaxError:
dm[u'_%s' % _colname] = df[colname]
else:
dm[_colname] = df[colname]
ops.auto_type(dm)
return dm
PK foHI$ $ datamatrix/operations.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
from datamatrix import DataMatrix, FloatColumn, IntColumn, SeriesColumn, \
MixedColumn
from datamatrix._datamatrix._seriescolumn import _SeriesColumn
from datamatrix._datamatrix._basecolumn import BaseColumn
import random
import warnings
try:
from datamatrix import convert
import pandas as pd
except ImportError as e:
pass
else:
pivot_table = convert.wrap_pandas(pd.pivot_table)
def weight(col):
"""
desc: |
Weights a DataMatrix by a column. That is, each row from a DataMatrix is
repeated as many times as the value in the weighting column.
For example:
A B
---
1 X
2 Y
>>> weight(dm.A)
A B
---
1 X
2 Y
2 Y
arguments:
col:
desc: The column to weight by.
type: BaseColumn
returns:
type: DataMatrix
"""
dm1 = col._datamatrix
dm2 = DataMatrix(length=int(col.sum))
for colname, col in dm1.columns:
dm2[colname] = type(col)
i2 = 0
for i1, weight in enumerate(col):
if not isinstance(weight, int) or weight < 0:
raise TypeError(u'Weights should be non-negative integer values')
for c in range(weight):
for colname in dm1.column_names:
dm2[colname][i2] = dm1[colname][i1]
i2 += 1
return dm2
def split(col):
"""
desc:
Splits a DataMatrix by unique values in a column.
arguments:
col:
desc: The column to split by.
type: BaseColumn
returns:
desc: A iterator over (value, DataMatrix) tuples.
type: Iterator
"""
for val in col.unique:
yield val, col == val
def tuple_split(col, *values):
"""
desc:
Splits a DataMatrix by values in a column, and returns the split as a
tuple of DataMatrix objects.
arguments:
col:
desc: The column to split by.
type: BaseColumn
argument-list:
values: A list values to split.
returns:
A tuple of DataMatrix objects.
example: |
dm1, dm2 = tuple_split(dm.col, 1, 2)
"""
n_total = len(col)
n_select = 0
l = []
for val in values:
dm = col == val
n = len(dm)
if not n:
warnings.warn('No matching rows for %s' % val)
n_select += n
l.append(dm)
if n_select != n_total:
warnings.warn('Some rows have not been selected')
return tuple(l)
def bin_split(col, bins):
"""
desc:
Splits a DataMatrix into bins; that is, the DataMatrix is first sorted
by a column, and then split into equal-size (or roughly equal-size)
bins.
arguments:
col:
desc: The column to split by.
type: BaseColumn
bins:
desc: The number of bins.
type: int
returns:
desc: A generator that iterators over the splits.
example: |
# Get the mean response time for 10 bins
for dm_ in op.split(dm.response_time, bins=10):
print(dm_.response_time.mean)
"""
if len(col) < bins:
raise ValueError('More bins than rows')
dm = sort(col._datamatrix, by=col)
for i in range(bins):
start = int(len(dm)/bins*i)
end = int(len(dm)/bins*(i+1))
yield dm[start:end]
def fullfactorial(dm, ignore=u''):
"""
desc: |
*Requires numpy*
Creates a new DataMatrix that uses a specified DataMatrix as the base of
a full-factorial design. That is, each value of every row is combined
with each value from every other row. For example:
A B
---
x 3
y 4
>>> fullfactorial(dm)
A B
---
x 3
x 4
y 3
y 4
arguments:
dm:
desc: The source DataMatrix.
type: DataMatrix
keywords:
ignore: A value that should be ignored.
return:
type: DataMatrix
"""
for colname, col in dm.columns:
if not isinstance(col, MixedColumn):
raise ValueError(u'fullfactorial only works with MixedColumns')
design = [len(col != ignore) for name, col in dm.columns]
a = _fullfact(design)
fdm = DataMatrix(a.shape[0])
for name in dm.column_names:
fdm[name] = u''
for i in range(a.shape[0]):
row = a[i]
for rownr, name in enumerate(dm.column_names):
fdm[name][i] = dm[name][int(row[rownr])]
return fdm
def group(dm, by=None):
"""
desc: |
*Requires numpy*
Groups the DataMatrix by unique values in a set of grouping columns.
Grouped columns are stored as SeriesColumns. The columns that are
grouped should contain numeric values.
For example:
A B
---
x 0
x 1
y 2
y 3
>>> group(dm, by=[dm.a])
Gives:
A B
---
x [0, 1]
y [2, 3]
arguments:
dm:
desc: The DataMatrix to group.
type: DataMatrix
keywords:
by: A list of columns to group by.
type: [list, None]
returns:
desc: A grouped DataMatrix.
type: DataMatrix
"""
import numpy as np
bycol = MixedColumn(datamatrix=dm)
if by is not None:
for col in by:
if col._datamatrix is not dm:
raise ValueError(u'By-columns are from a different DataMatrix')
bycol += col
keys = bycol.unique
groupcols = [(name, col) for name, col in dm.columns if col not in by]
nogroupcols = [(name, col) for name, col in dm.columns if col in by]
cm = DataMatrix(length=len(keys))
for name, col in groupcols:
if isinstance(col, _SeriesColumn):
warnings.warn(
u'Failed to create series for SeriesColumn s%s' % name)
continue
cm[name] = SeriesColumn(depth=0)
for name, col in nogroupcols:
cm[name] = col.__class__
for i, key in enumerate(keys):
dm_ = bycol == key
for name, col in groupcols:
if isinstance(col, _SeriesColumn):
continue
if cm[name].depth < len(dm_[name]):
cm[name].defaultnan = True
cm[name].depth = len(dm_[name])
cm[name].defaultnan = False
try:
cm[name][i,:len(dm_[name])] = dm_[name]
except ValueError:
warnings.warn(
u'Failed to create series for MixedColumn %s' % name)
for name, col in nogroupcols:
cm[name][i] = dm_[name][0]
return cm
def sort(obj, by=None):
"""
desc:
Sorts a column or DataMatrix. In the case of a DataMatrix, a column must
be specified to determine the sort order. In the case of a column, this
needs to be specified if the column should be sorted by another column.
arguments:
obj:
type: [DataMatrix, BaseColumn]
by:
desc: The sort key, that is, the column that is used for sorting
the DataMatrix, or the other column.
type: BaseColumn
returns:
desc: The sorted DataMatrix, or the sorted column.
type: [DataMatrix, BaseColumn]
"""
if isinstance(obj, DataMatrix):
if by is None:
raise ValueError(
'The by keyword is required when sorting a DataMatrix')
return obj._selectrowid(by._sortedrowid())
if by is None:
by = obj
col = obj._getrowidkey(by._sortedrowid())
col._rowid = obj._rowid
return col
def shuffle(obj):
"""
desc:
Shuffles a DataMatrix or a column. If a DataMatrix is shuffle, the order
of the rows is shuffled, but values that were in the same row will stay
in the same row.
arguments:
obj:
type: [DataMatrix, BaseColumn]
returns:
desc: The shuffled DataMatrix or column.
type: [DataMatrix, BaseColumn]
"""
_rowid = list(obj._rowid)
random.shuffle(_rowid)
if isinstance(obj, DataMatrix):
return obj._selectrowid(_rowid)
col = obj._getrowidkey(_rowid)
col._rowid = obj._rowid
return col
def keep_only(dm, cols=[]):
"""
desc: |
Removes all columns from the DataMatrix, except those listed in `cols`.
*Note:* This modifies the DataMatrix in place.
arguments:
dm:
type: DataMatrix
keywords:
cols:
desc: A list of column names, or columns.
type: list
"""
colnames = []
for col in cols:
if isinstance(col, basestring):
colnames.append(col)
continue
if isinstance(col, BaseColumn):
colnames.append(col.name)
continue
raise ValueError(u'Expecting column names or BaseColumn objects')
for colname in dm.column_names:
if colname not in colnames:
del dm[colname]
def auto_type(dm):
"""
desc: |
Converts all columns of type MixedColumn to IntColumn if all values are
integer numbers, or FloatColumn if all values are non-integer numbes.
*Note:* This modifies the DataMatrix in place.
arguments:
dm:
type: DataMatrix
"""
for name, col in dm.columns:
if isinstance(col, (FloatColumn, IntColumn)):
continue
col_type = IntColumn
for val in col:
try:
assert(int(val) == float(val))
except:
try:
float(val)
col_type = FloatColumn
except:
break
else:
new_col = col_type(col._datamatrix)
new_col[:] = col
del dm[name]
dm[name] = new_col
dm._mutate()
# Private function
def _fullfact(levels):
"""
desc:
Taken from pydoe. See:
"""
import numpy as np
n = len(levels) # number of factors
nb_lines = np.prod(levels) # number of trial conditions
H = np.zeros((nb_lines, n))
level_repeat = 1
range_repeat = np.prod(levels)
for i in range(n):
range_repeat /= levels[i]
lvl = []
for j in range(levels[i]):
lvl += [j]*level_repeat
rng = lvl*range_repeat
level_repeat *= levels[i]
H[:, i] = rng
return H
PK $H_R R datamatrix/__init__.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
import datamatrix.monkeypatch
from datamatrix._datamatrix._row import Row
from datamatrix._datamatrix._mixedcolumn import MixedColumn
from datamatrix._datamatrix._numericcolumn import FloatColumn, IntColumn
from datamatrix._datamatrix._seriescolumn import SeriesColumn
from datamatrix._datamatrix._datamatrix import DataMatrix
from datamatrix._cache import cached, iscached
__version__ = '0.1.0'
PK jH-a a datamatrix/rbridge/lme4.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
import os
import time
import subprocess
from datamatrix import io, series, SeriesColumn, DataMatrix, cached
from datamatrix._datamatrix._seriescolumn import _SeriesColumn
from datamatrix.py3compat import *
@cached
def lmer(dm, formula):
cmd = u'''
library(lmerTest)
result <- lmer(%s)
s = summary(result)
s;
write.csv(s$coef, ".r-out.csv")
''' % formula
rm = _launchr(dm, cmd)
rm.rename(u'', u'effect')
rm.rename(u'Estimate', u'est')
rm.rename(u'Std. Error', u'se')
rm.rename(u't value', u't')
if u'Pr(>|t|)' in rm:
rm.rename(u'Pr(>|t|)', u'p')
else:
rm.p = -1
return rm
@cached
def glmer(dm, formula, family):
cmd = u'''
library(lme4)
result <- glmer(%s, family="%s")
s = summary(result)
s;
write.csv(s$coef, ".r-out.csv")
''' % (formula, family)
rm = _launchr(dm, cmd)
rm.rename(u'', u'effect')
rm.rename(u'Estimate', u'est')
rm.rename(u'Std. Error', u'se')
rm.rename(u'z value', u'z')
if u'Pr(>|z|)' in rm:
rm.rename(u'Pr(>|z|)', u'p')
else:
rm.p = -1
return rm
@cached
def lmer_series(dm, formula, winlen=1):
col = formula.split()[0]
depth = dm[col].depth
rm = None
for i in range(0, depth, winlen):
wm = dm[:]
wm[col] = series.reduce_(
series.window(wm[col], start=i, end=i+winlen))
lm = lmer(wm, formula)
print('Sample %d' % i)
print(lm)
if rm is None:
rm = DataMatrix(length=len(lm))
rm.effect = list(lm.effect)
rm.p = SeriesColumn(depth=depth)
rm.t = SeriesColumn(depth=depth)
rm.est = SeriesColumn(depth=depth)
rm.se = SeriesColumn(depth=depth)
for lmrow, rmrow in zip(lm, rm):
rmrow.p[i:i+winlen] = lmrow.p
rmrow.t[i:i+winlen] = lmrow.t
rmrow.est[i:i+winlen] = lmrow.est
rmrow.se[i:i+winlen] = lmrow.se
return rm
def _launchr(dm, cmd):
dm = dm[:]
# SeriesColumns cannot be saved to a csv file, so we delete those first.
for name, col in dm.columns:
if isinstance(col, _SeriesColumn):
del dm[name]
# Write the data to an input file
io.writetxt(dm, u'.r-in.csv')
# Launch R, read the data, and communicate the commands
proc = subprocess.Popen( ['R', '--vanilla'], stdin=subprocess.PIPE)
# proc = subprocess.Popen( ['R', '--vanilla'], stdin=subprocess.PIPE,
# stdout=subprocess.PIPE, stderr=subprocess.PIPE)
cmd = u'data <- read.csv(".r-in.csv")\nattach(data)\n%s' % cmd
proc.communicate(safe_encode(cmd, u'ascii'))
# Wait until the output file has been generated and return it
while not os.path.exists(u'.r-out.csv'):
time.sleep(.5)
dm = io.readtxt(u'.r-out.csv')
return dm
PK 'f3H_R R datamatrix/rbridge/__init__.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
import datamatrix.monkeypatch
from datamatrix._datamatrix._row import Row
from datamatrix._datamatrix._mixedcolumn import MixedColumn
from datamatrix._datamatrix._numericcolumn import FloatColumn, IntColumn
from datamatrix._datamatrix._seriescolumn import SeriesColumn
from datamatrix._datamatrix._datamatrix import DataMatrix
from datamatrix._cache import cached, iscached
__version__ = '0.1.0'
PK acHH
( datamatrix/_datamatrix/_numericcolumn.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
from datamatrix._datamatrix._basecolumn import BaseColumn
import operator
try:
import numpy as np
from scipy.stats import nanmean, nanmedian, nanstd
nan = np.nan
except ImportError:
np = None
nan = None
class NumericColumn(BaseColumn):
"""
desc:
A base class for FloatColumn and IntColumn. Don't use this class
directly.
"""
dtype = float
invalid = nan
def __init__(self, datamatrix):
if np is None:
raise Exception(u'NumPy and SciPy are required, but not installed.')
super(NumericColumn, self).__init__(datamatrix)
@property
def unique(self):
return np.unique(self._seq)
@property
def mean(self):
return nanmean(self._seq)
@property
def median(self):
return nanmedian(self._seq)
@property
def std(self):
return nanstd(self._seq)
@property
def max(self):
if not len(self._seq):
return np.nan
return np.nanmax(self._seq)
@property
def min(self):
if not len(self._seq):
return np.nan
return np.nanmin(self._seq)
@property
def sum(self):
if not len(self._seq):
return np.nan
return np.nansum(self._seq)
def _printable_list(self):
return list(self._seq)
def _init_rowid(self):
self._rowid = np.array(self._datamatrix._rowid, dtype=int)
def _init_seq(self):
self._seq = np.empty(len(self._datamatrix), dtype=self.dtype)
self._seq[:] = self.invalid
def _checktype(self, value):
try:
return float(value)
except:
return np.nan
def _tosequence(self, value, length):
if isinstance(value, basestring):
a = np.empty(length, dtype=self.dtype)
a[:] = np.nan
return a
return super(NumericColumn, self)._tosequence(value, length)
def _compare(self, other, op):
i = np.where(op(self._seq, other))[0]
return self._datamatrix._selectrowid(list(self._rowid[i]))
def _operate(self, other, number_op, str_op=None):
col = self._empty_col()
col._rowid = self._rowid
col._seq = number_op(self._seq, other)
return col
def _addrowid(self, _rowid):
old_length = len(self)
self._rowid = np.concatenate((self._rowid, _rowid))
a = np.empty(len(self._rowid), dtype=self.dtype)
a[:old_length] = self._seq
a[old_length:] = self.invalid
self._seq = a
def _getrowidkey(self, key):
# We need to select all rows that match the rowids specified in key,
# while preserving the order provided by key. To do this, we use the
# following logic:
# - Get a list of indices (`orig_indices`) that give a sorted view on
# self._rowid.
# - Use this to search through a sorted view of _rowid for all items in
# key
# - Map the matching indices, which refer to the sorted view of _rowid
# back to a list of indices in the original, non-sorted array.
# See also: http://stackoverflow.com/questions/9566592/\
# find-multiple-values-within-a-numpy-array
col = self._empty_col()
orig_indices = self._rowid.argsort()
matching_indices = np.searchsorted(self._rowid[orig_indices], key)
selected_indices = orig_indices[matching_indices]
col._rowid = self._rowid[selected_indices]
col._seq = self._seq[selected_indices]
return col
def _sortedrowid(self):
return list(self._rowid[self._seq.argsort()])
def _merge(self, other, _rowid):
col = self._empty_col()
i_other = ~np.in1d(other._rowid, self._rowid) \
& np.in1d(other._rowid, _rowid)
i_self = np.in1d(self._rowid, _rowid)
col._rowid = np.concatenate(
(self._rowid[i_self], other._rowid[i_other]))
col._seq = np.concatenate((self._seq[i_self], other._seq[i_other]))
return col._getrowidkey(_rowid)
class FloatColumn(NumericColumn):
"""
desc:
A column of numeric float values. Invalid values are marked as
numpy.nan.
"""
pass
class IntColumn(NumericColumn):
"""
desc:
A column of numeric int values. Does not support invalid values.
"""
dtype = int
invalid = 0
def _tosequence(self, value, length):
if not isinstance(value, basestring):
try:
value = list(value)
except:
pass
else:
return super(NumericColumn, self)._tosequence(value, length)
try:
value = int(value)
except:
raise TypeError(u'IntColumn expects integers!')
return super(NumericColumn, self)._tosequence(value, length)
def _checktype(self, value):
try:
return int(value)
except:
raise TypeError(u'IntColumn expects integers!')
def _operate(self, other, number_op, str_op=None):
col = super(IntColumn, self)._operate(other, number_op, str_op=None)
col._seq = col._seq.astype(self.dtype)
return col
def __div__(self, other):
return self._operate(other, operator.floordiv)
def __truediv__(self, other):
return self._operate(other, operator.floordiv)
PK C~]Hz0 ' datamatrix/_datamatrix/_seriescolumn.py# -*- coding: utf-8 -*-
"""
This file is part of datamatrix.
datamatrix is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
datamatrix is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with datamatrix. If not, see .
"""
from datamatrix.py3compat import *
# from datamatrix._datamatrix._basecolumn import BaseColumn
from datamatrix._datamatrix._numericcolumn import NumericColumn
try:
import numpy as np
from scipy.stats import nanmean, nanmedian, nanstd
except ImportError:
np = None
class _SeriesColumn(NumericColumn):
"""
desc:
A column in which each cell is a numeric series.
"""
dtype = float
def __init__(self, datamatrix, depth, defaultnan=False):
"""
desc:
Constructor. You generally don't call this constructor correctly,
but use the SeriesColumn helper function.
arguments:
datamatrix:
desc: The DataMatrix to which this column belongs.
type: DataMatrix
depth:
desc: The depth, ie. the number of values per cell.
type: int
"""
if np is None:
raise Exception(u'NumPy and SciPy are required, but not installed.')
self._depth = depth
self.defaultnan = defaultnan
NumericColumn.__init__(self, datamatrix)
def setallrows(self, value):
"""
desc:
Sets all rows to a value, or series of values.
arguments:
value: A value, or series of values that has the same length as the
depth of the column.
"""
value = self._checktype(value)
self._seq[:] = value
@property
def unique(self):
raise NotImplementedError(u'unique is not implemented for SeriesColumn')
@property
def depth(self):
"""
name: depth
desc:
A property to access and change the depth of the column.
"""
return self._depth
@depth.setter
def depth(self, depth):
if depth == self._depth:
return
if depth > self._depth:
seq = np.zeros( (len(self), depth), dtype=self.dtype)
if self.defaultnan:
seq[:] = np.nan
seq[:,:self._depth] = self._seq
self._seq = seq
self._depth = depth
return
self._depth = depth
self._seq = self._seq[:,:depth]
@property
def plottable(self):
"""
name: plottable
desc:
Gives a view of the traces where the axes have been swapped. This is
the format that matplotlib.pyplot.plot() expects.
"""
return np.swapaxes(self._seq, 0, 1)
@property
def mean(self):
return nanmean(self._seq, axis=0)
@property
def median(self):
return nanmedian(self._seq, axis=0)
@property
def std(self):
return nanstd(self._seq, axis=0)
@property
def max(self):
return np.nanmax(self._seq, axis=0)
@property
def min(self):
return np.nanmin(self._seq, axis=0)
@property
def sum(self):
return np.nansum(self._seq, axis=0)
# Private functions
def _init_seq(self):
self._seq = np.zeros( (len(self._datamatrix), self._depth),
dtype=self.dtype)
if self.defaultnan:
self._seq[:] = np.nan
def _ellipsize(self, a):
"""
visible: False
desc:
Creates an ellipsized represenation of an array.
arguments:
a: An array.
returns:
A string with an ellipsized representation.
"""
return u'%s ... %s' % (str(a[:2])[:-1], str(a[-2:])[1:])
def _printable_list(self):
if self._depth <= 4:
return list(self._seq)
return [self._ellipsize(cell) for cell in self]
def _operate(self, a, number_op, str_op=None):
# For a 1D array with the length of the datamatrix, we create an array
# in which the second dimension (i.e. the depth) is constant. This
# allows us to do by-row operations.
if isinstance(a, (list, tuple)):
a = np.array(a, dtype=self.dtype)
if isinstance(a, NumericColumn):
a = np.array(a._seq)
if isinstance(a, np.ndarray) and a.shape == (len(self), ):
a2 = np.empty( (len(self), self._depth),
dtype=self.dtype)
np.rot90(a2)[:] = a
a = a2
col = self._empty_col()
col._rowid = self._rowid
col._seq = number_op(self._seq, a)
return col
def _checktype(self, value):
try:
a = np.empty(self._depth, dtype=self.dtype)
a[:] = value
except:
raise Exception('Invalid type: %s' % str(value))
return a
def _tosequence(self, value, length):
# For float and integers, we simply create a new (length, depth) array
# with only this value
if isinstance(value, (float, int)):
a = np.empty( (len(self._datamatrix), self._depth),
dtype=self.dtype)
a[:] = value
return a
try:
a = np.array(value, dtype=self.dtype)
except:
raise Exception('Cannot convert to sequence: %s' % str(value))
# For a 1D array with the length of the datamatrix, we create an array
# in which the second dimension (i.e. the depth) is constant.
if a.shape == (length, ):
a2 = np.empty( (length, self._depth), dtype=self.dtype)
np.rot90(a2)[:] = a
return a2
# For a 2D array that already has the correct dimensions, we return it.
if a.shape == (length, self._depth):
return a
raise Exception('Cannot convert to sequence: %s' % str(value))
def _empty_col(self):
return self.__class__(self._datamatrix, depth=self._depth)
def _addrowid(self, _rowid):
old_length = len(self)
self._rowid = np.concatenate((self._rowid, _rowid))
a = np.zeros( (len(self._rowid), self._depth), dtype=self.dtype)
a[:old_length] = self._seq
self._seq = a
# Implemented syntax
def __getitem__(self, key):
if isinstance(key, tuple) and len(key) == 2:
return self._seq[key].copy()
return super(_SeriesColumn, self).__getitem__(key)
def __setitem__(self, key, value):
if isinstance(key, tuple) and len(key) == 2:
self._seq[key] = value
return
return super(_SeriesColumn, self).__setitem__(key, value)
def SeriesColumn(depth, defaultnan=False):
return _SeriesColumn, {'depth' : depth, u'defaultnan' : defaultnan}
PK XG$o o &