PKdhHh{datamatrix/py3compat.py#-*- coding:utf-8 -*- """ This file is part of OpenSesame. OpenSesame is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OpenSesame is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenSesame. If not, see . """ import sys if sys.version_info >= (3,0,0): py3 = True basestring = str universal_newline_mode = u'r' else: bytes = str str = unicode py3 = False universal_newline_mode = u'rU' def safe_decode(s, enc='utf-8', errors='strict'): if isinstance(s, str): return s if isinstance(s, bytes): return s.decode(enc, errors) # Numeric values are encoded right away try: assert(int(s) == float(s)) return str(int(s)) except: try: return str(float(s)) except: pass # Some types need to be converted to unicode, but require the encoding # and errors parameters. Notable examples are Exceptions, which have # strange characters under some locales, such as French. It even appears # that, at least in some cases, they have to be encodeed to str first. # Presumably, there is a better way to do this, but for now this at # least gives sensible results. try: return safe_decode(bytes(s), enc=enc, errors=errors) except: pass # For other types, the unicode representation doesn't require a specific # encoding. This mostly applies to non-stringy things, such as integers. return str(s) def safe_encode(s, enc='utf-8', errors='strict'): if isinstance(s, bytes): return s # Numeric values are encoded right away try: assert(int(s) == float(s)) return str(int(s)).encode() except: try: return str(float(s)).encode() except: pass return s.encode(enc, errors) if py3: safe_str = safe_decode else: safe_str = safe_encode __all__ = ['py3', 'safe_decode', 'safe_encode', 'safe_str', 'universal_newline_mode'] if not py3: __all__ += ['str', 'bytes'] else: __all__ += ['basestring'] PK~#Hݴwdatamatrix/monkeypatch.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ def _monkey_patch_matplotlib(): """ visible: False desc: This patch decorates the is_string_like function of matplotlib, because this consider BaseColumn objects to be strings, with causes trouble when plotting. """ try: from matplotlib.axes import _base except ImportError: return from datamatrix._datamatrix._basecolumn import BaseColumn def decorate(fnc): def inner(obj): if isinstance(obj, BaseColumn): return False return fnc(obj) return inner _base.is_string_like = decorate(_base.is_string_like) _monkey_patch_matplotlib() PK\PPH))datamatrix/dispatch.py#-*- coding:utf-8 -*- """ This file is part of exparser. exparser is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. exparser is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with exparser. If not, see . """ import sys from datamatrix.py3compat import * from datamatrix import DataMatrix, _cache import time import warnings def dispatch(dm, modules=[], full=[], cache_prefix='auto_cache.'): """ desc: Executes an analysis loop, in which all functions that specified on the command are executed. A function is executed if its name is prefixed by `@` and if it is present in one of the helpers modules. Cachable functions are cached automatically. If a function returns a DataMatrix, this is used to replace the current DataMatrix for the following functions. arguments: dm: The DataMatrix to analyze. keywords: modules: A module or list of modules that contain the analysis functions. full: A list of functions or function names that make up the full analysis pathway. cache_prefix: A prefix for the cacheid for cachable functions. The function name will be appended. """ if not isinstance(modules, list): modules = [modules] if not modules: raise Exception('No modules specified') print('Dispatching ...') t0 = time.time() if '@full' in sys.argv: print('Running full analysis pathway') if not full: raise Exception('No full analysis pathway specified') for func in full: dm = _callfunc(dm, modules, func, cache_prefix=cache_prefix) else: for func in sys.argv: if not func[0] == '@': continue if ':redo' in func: func = func.replace(':redo', '') redo = True else: redo = False dm = _callfunc(dm, modules, func, cache_prefix=cache_prefix, redo=redo) print('Dispatch finished (%.2f s)' % (time.time() - t0)) def waterfall(*pipeline): """ desc: Implements a "cached waterfall", which is a series of cachable operations which is executed from the last point onward that is not cached. argument-list: pipeline: A list of (func, cacheid, kwdict) tuples. Here, func is a cachable function, cacheid specifies the cacheid, and kwdict is dictionary of keyword arguments to passed to func. Each function except the first should take a DataMatrix as the first argument. All functions should return a DataMatrix. returns: type: DataMatrix """ print('Starting waterfall ...') t0 = time.time() todo = [] dm = None for i, (func, cacheid, kwdict) in enumerate(pipeline[::-1]): hascachefile, cachepath = _cache.cachefile(cacheid) if not hascachefile: todo.append( (func, cacheid, kwdict)) continue print(u'-> Latest cache is %s' % func.__name__) dm = _cache.readcache(cachepath) break for func, cacheid, kwdict in todo[::-1]: if dm is None: print(u'-> Running (entry point) %s' % func.__name__) dm = func(cacheid=cacheid, **kwdict) else: print(u'-> Running %s' % func.__name__) dm = func(dm, cacheid=cacheid, **kwdict) print('Waterfall finished (%.2f s)' % (time.time() - t0)) return dm # Private functions def _callfunc(dm, modules, func, cache_prefix='auto_cache.', redo=False): """ desc: Calls a single function from a module. arguments: dm: The DataMatrix to analyze. modules: list of modules that may contain the function. func: The function name. keywords: cache_prefix: A prefix for the cacheid for cachable functions. The function name will be appended. redo: Indicates whether functions should be redone, even if a cache is available. returns: DataMatrix """ if func[0] == '@': func = func[1:] found = False for mod in modules: if hasattr(mod, func): t1 = time.time() if isinstance(func, basestring): _func = getattr(mod, func) else: _func = func if not redo and _cache.iscached(_func): cacheid = cache_prefix + func print('-> Calling %s.%s() [cacheid=%s]' \ % (mod.__name__, func, cacheid)) retval = _func(dm, cacheid=cacheid) else: print('-> Calling %s.%s() [uncached]' % (mod.__name__, func)) retval = _func(dm) if isinstance(retval, DataMatrix): print('-> DataMatrix was modified') dm = retval print('-> Finished %s.%s() in %.2f s' % (mod.__name__, func, time.time()-t1)) found = True break # Break in case the same function occurs in multiple modules if not found: warnings.warn('Helper function %s does not exist' % func) return dm PKDRkH ˱datamatrix/plot.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ import os import sys import numpy as np from matplotlib import pyplot as plt from datamatrix.colors.tango import * plotfolder = 'plot' if '--clear-plot' in sys.argv and os.path.exists(plotfolder): print('Removing plot folder (%s)' % plotfolder) import shutil shutil.rmtree(plotfolder) plt.style.use('ggplot') plt.rc('font', family='liberation sans', size=10) # Some pre-defined sizes xs = 4, 4 s = 6, 6 ws = 6, 3 r = 8, 8 w = 12, 8 h = 8, 12 l = 12, 12 xl = 16, 16 def new(size=r): """ desc: Creates a new figure. keywords: size: desc: The figure size. type: tuple returns: A matplotlib figure. """ fig = plt.figure(figsize=size) plt.subplots_adjust(left=.15, right=.9, bottom=.15, top=.9, wspace=.3, hspace=.3) return fig def trace(series, x=None, color=blue[1], err=True, **kwdict): """ desc: Creates an average-trace plot. arguments: series: desc: The signal. type: SeriesColumn keywords: x: desc: An array for the X axis with the same length as series, or None for a default axis. type: [ndarray, None] color: desc: The color. type: str label: desc: A label for the line, or None for no label. type: [str, None] """ y = series.mean ymin = y - series.std/np.sqrt(len(series)) ymax = y + series.std/np.sqrt(len(series)) if x is None: x = np.arange(len(y)) if err: plt.fill_between(x, ymin, ymax, color=color, alpha=.2) plt.plot(x, y, color=color, **kwdict) def threshold(a, y=1, min_length=1, **kwdict): inhit = False for x, hit in enumerate(a): if not inhit and hit: onset = x inhit = True if inhit and not hit: if x-onset >= min_length: plt.plot([onset, x], [y,y], **kwdict) inhit = False if inhit: if x-onset >= min_length: plt.plot([onset, x], [y,y], **kwdict) def regress(x, y, annotate=True, symbol='.', linestyle='--', symbolcolor=blue[1], linecolor=blue[1], label=None): """ desc: Creates a regression plot. arguments: x: desc: A column for the X data. type: BaseColumn y: desc: A column for the Y data. type: BaseColumn keywords: annotate: desc: Indicates whether the correlation and p-value should be marked in the plot. type: bool symbol: TODO linestyle: TODO symbolcolor: TODO linecolor: TODO label: TODO returns: desc: The regression parameters as a (slope, intercept, correlation, p-value, standard error) tuple type: tuple """ from scipy.stats import linregress s, i, r, p, se = linregress(x, y) plt.plot(x, y, symbol, color=symbolcolor) xData = np.array([min(x), max(x)]) yData = i + s*xData plt.plot(xData, yData, linestyle, color=linecolor, label=label) if annotate: plt.text(0.05, 0.95, 'r = %.3f, p = %.3f' % (r, p), ha='left', \ va='top', transform=plt.gca().transAxes) return s, i, r, p, se def save(name, folder=None, show=False, dpi=200): """ desc: Saves the current figure to the correct folder, depending on the active experiment. arguments: name: desc: The name for the figure. type: bool keywords: folder: desc: A name for a subfolder to save the plot or None to save directly in the plotfolder. type: [str, None] show: desc: Indicates whether the figure should be shown as well. type: bool dpi: desc: The dots per inch to use for the png export. type: int """ if folder != None: _plotfolder = os.path.join(plotfolder, folder) else: _plotfolder = plotfolder try: os.makedirs(os.path.join(_plotfolder, 'svg')) except: pass try: os.makedirs(os.path.join(_plotfolder, 'png')) except: pass pathSvg = os.path.join(_plotfolder, 'svg', '%s.svg' % name) pathPng = os.path.join(_plotfolder, 'png', '%s.png' % name) plt.savefig(pathSvg) plt.savefig(pathPng, dpi=dpi) if show or '--show' in sys.argv: plt.show() else: plt.clf() PKSRrH 4z8(8(datamatrix/series.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix._datamatrix._seriescolumn import _SeriesColumn from datamatrix import FloatColumn from datamatrix.colors import tango import numpy as np from scipy.stats import nanmean, nanmedian, nanstd from scipy.interpolate import interp1d import warnings def endlock(series): endlock_series = _SeriesColumn(series._datamatrix, series.depth) endlock_series[:] = np.nan for i in range(len(series)): for j in range(series.depth-1, -1, -1): if not np.isnan(series[i,j]): break endlock_series[i,-j-1:] = series[i,:j+1] return endlock_series def reduce_(series, operation=nanmean): """ desc: Transforms series to single values by applying an operation (typically a mean) to each series. arguments: series: desc: The signal to reduce. type: SeriesColumn keywords: operation: desc: The operation function to use for the reduction. This function should accept `series` as first argument, and `axis=1` as keyword argument. returns: desc: A reduction of the signal. type: FloatColumn """ col = FloatColumn(series._datamatrix) try: a = operation(series, axis=1) except TypeError: for i, val in enumerate(series): col[i] = operation(val) else: col[:] = a return col def window(series, start=0, end=None): """ desc: Extracts a window from a signal. arguments: series: desc: The signal to get a window from. type: SeriesColumn keywords: start: desc: The window start. type: int end: desc: The window end, or None to go to the signal end. type: [int, None] returns: desc: A window of the signal. type: SeriesColumn """ if end is None: end = series.depth a = series[:,start:end] depth = a.shape[1] window_series = _SeriesColumn(series._datamatrix, depth) window_series[:] = a return window_series def baseline(series, baseline, bl_start=-100, bl_end=None, reduce_fnc=None): """ desc: Applies a baseline to a signal arguments: series: desc: The signal to apply a baseline to. type: SeriesColumn baseline: desc: The signal to use as a baseline to. type: SeriesColumn keywords: bl_start: desc: The start of the window from `baseline` to use. type: int bl_end: desc: The end of the window from `baseline` to use, or None to go to the end. type: [int, None] reduce_fnc: desc: The function to reduce the baseline epoch to a single value. If None, np.nanmedian() is used. type: [FunctionType, None] returns: desc: A baseline-correct version of the signal. type: SeriesColumn """ if reduce_fnc is None: reduce_fnc = nanmedian baseline = reduce_(window(baseline, start=bl_start, end=bl_end), operation=reduce_fnc) return series / baseline def blinkreconstruct(series, vt=5, maxdur=500, margin=10): """ Source: Mathot, S. (2013). A simple way to reconstruct pupil size during eye blinks. http://doi.org/10.6084/m9.figshare.688002 desc: Reconstructs pupil size during blinks. arguments: series: desc: A signal to reconstruct. type: SeriesColumn keywords: vt: desc: A pupil velocity threshold. Lower tresholds more easily trigger blinks. type: [int, float] maxdur: desc: The maximum duration (in samples) for a blink. Longer blinks are not reconstructed. type: int margin: desc: The margin to take around missing data. type: int returns: desc: A reconstructed singal. type: SeriesColumn """ return _apply_fnc(series, _blinkreconstruct, vt=vt, maxdur=500, margin=margin) def smooth(series, winlen=11, wintype='hanning', correctlen=True): """ desc: Source: Smooths a signal using a window with requested size. This method is based on the convolution of a scaled window with the signal. The signal is prepared by introducing reflected copies of the signal (with the window size) in both ends so that transient parts are minimized in the begining and end part of the output signal. arguments: series: desc: A signal to smooth. type: SeriesColumn keywords: winlen: desc: The width of the smoothing window. This should be an odd integer. type: int wintype: desc: The type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'. A flat window produces a moving average smoothing. type: str correctlen: desc: Indicates whether the return string should be the same length as the input string. type: bool returns: desc: A smoothed signal. type: SeriesColumn """ return _apply_fnc(series, _smooth, winlen=winlen, wintype=wintype, correctlen=correctlen) def threshold(series, fnc, min_length=1): """ desc: Finds samples that satisfy some threshold criterion for a given period. arguments: series: desc: A signal to threshold. type: SeriesColumn fnc: desc: A function that takes a single value and returns True if this value exceeds a threshold, and False otherwise. type: FunctionType keywords: min_length: desc: The minimum number of samples for which `fnc` must return True. type: int returns: desc: A series where 0 indicates below threshold, and 1 indicates above threshold. type: SeriesColumn """ threshold_series = _SeriesColumn(series._datamatrix, series.depth) threshold_series[:] = 0 # First walk through all rows for i, trace in enumerate(series): print() # Then walk through all samples within a row nhit = 0 for j, val in enumerate(trace): hit = fnc(val) if hit: nhit += 1 continue if nhit >= min_length: threshold_series[i,j-nhit:j] = 1 nhit = 0 if nhit >= min_length: threshold_series[i,j-nhit:j] = 1 return threshold_series # Private functions def _apply_fnc(series, fnc, **kwdict): """ visible: False desc: Applies a function to each cell. arguments: series: desc: A signal to apply the function to. type: SeriesColumn fnc: desc: The function to apply. keyword-dict: kwdict: A dict with keyword arguments for fnc. returns: desc: A new signal. type: SeriesColumn """ new_series = _SeriesColumn(series._datamatrix, depth=series.depth) for i, cell in enumerate(series): new_series[i] = fnc(cell, **kwdict) return new_series def _blinkreconstruct(a, vt=5, maxdur=500, margin=10, smooth_winlen=21, std_thr=3): """ visible: False desc: Reconstructs a single array. """ # Create a copy of the signal, a smoothed version, and calculate the # velocity profile. a = np.copy(a) try: strace = _smooth(a, winlen=smooth_winlen) except Exception as e: warnings.warn(str(e)) strace = a vtrace = strace[1:]-strace[:-1] # Start blink detection ifrom = 0 lblink = [] while True: # The onset of the blink is the moment at which the pupil velocity # exceeds the threshold. l = np.where(vtrace[ifrom:] < -vt)[0] if len(l) == 0: break # No blink detected istart = l[0]+ifrom if ifrom == istart: break # The reversal period is the moment at which the pupil starts to dilate # again with a velocity above threshold. l = np.where(vtrace[istart:] > vt)[0] if len(l) == 0: ifrom = istart continue imid = l[0]+istart # The end blink period is the moment at which the pupil velocity drops # back to zero again. l = np.where(vtrace[imid:] < 0)[0] if len(l) == 0: ifrom = imid continue iend = l[0]+imid ifrom = iend # We generally underestimate the blink period, so compensate for this if istart-margin >= 0: istart -= margin if iend+margin < len(a): iend += margin # We don't accept blinks that are too long, because blinks are not # generally very long (although they can be). if iend-istart > maxdur: ifrom = istart+maxdur//10 continue lblink.append( (istart, iend) ) # Now reconstruct the trace during the blinks for istart, iend in lblink: # First create a list of (when possible) four data points that we can # use for interpolation. dur = iend - istart l = [] if istart-dur >= 0: l += [istart-dur] l += [istart, iend] if iend+dur < len(strace): l += [iend+dur] x = np.array(l) # If the list is long enough we use cubic interpolation, otherwise we # use linear interpolation y = a[x] if len(x) >= 4: f2 = interp1d(x, y, kind='cubic') else: f2 = interp1d(x, y) xInt = np.arange(istart, iend) yInt = f2(xInt) a[xInt] = yInt # For all remaining gaps, replace them with the previous sample if available b = np.where( (a < (a.mean()-std_thr*a.std())) \ | (a.mean() > (a+std_thr*a.std())) \ | np.isnan(a) )[0] for i in b: if i == 0: continue a[i] = a[i-1] return a def _smooth(a, winlen=11, wintype='hanning', correctlen=True): """ visible: False desc: Smooths a single array. """ if a.ndim != 1: raise ValueError("smooth only accepts 1 dimension arrays.") if a.size < winlen: raise ValueError("Input vector needs to be bigger than window size.") if winlen < 3: return a if not wintype in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: raise ValueError( "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'") s = np.r_[a[winlen-1:0:-1], a, a[-1:-winlen:-1]] if wintype == 'flat': #moving average w = np.ones(winlen, 'd') else: func = getattr(np, wintype) w = func(winlen) y = np.convolve(w/w.sum(), s, mode='valid') if correctlen: y = y[(winlen/2-1):-(winlen/2)] # The output array can be one shorter than the input array if len(y) > len(a): y = y[:len(a)] elif len(y) < len(a): raise Exception('The output array is too short!') return y PK|MPH[qٞ datamatrix/_cache.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * import os import sys import time import pickle import shutil cache_initialized = False skipcache = '--no-cache' in sys.argv cachefolder = '.cache' protocol = pickle.HIGHEST_PROTOCOL def init_cache(): """ desc: Initializes the cache system. """ global cache_initialized if cache_initialized: return cache_initialized = True print(u'Initializing cache ...') if '--clear-cache' in sys.argv and os.path.exists(cachefolder): print(u'Removing cache folder (%s)' % cachefolder) shutil.rmtree(cachefolder) if not os.path.exists(cachefolder): print(u'Creating cache folder (%s)' % cachefolder) os.mkdir(cachefolder) def cached(func): """ desc: A decorator function that provides a cache for functions that return a pickable value. """ def inner(*args, **kwargs): iscached = True if 'cacheid' in kwargs: hascachefile, cachepath = cachefile(kwargs['cacheid']) del kwargs['cacheid'] else: cachepath = None if skipcache or cachepath is None or not hascachefile: print('@cached: calling %s' % func) a = func(*args, **kwargs) if cachepath is not None: print('@cached: saving %s' % cachepath) writecache(a, cachepath) else: ctime = time.ctime(os.path.getctime(cachepath)) print('@cached: loading %s (created %s)' % (cachepath, ctime)) a = readcache(cachepath) return a init_cache() inner.__name__ = func.__name__ return inner def iscached(func): """ desc: Checks whether a function is cachable. returns: desc: True if cachable, False otherwise. type: false """ init_cache() if py3: return 'iscached' in func.__code__.co_varnames return 'iscached' in func.func_code.co_varnames def cachefile(cacheid): """ desc: Gets the cachefile for a cacheid, and checks whether this file exists. arguments: cacheid: The cacheid. returns: A (cache_exists, cachepath) tuple, where the first is a boolean that indicates if the second exists. """ init_cache() path = os.path.join(cachefolder, cacheid) + '.pkl' if os.path.exists(path): return True, path return False, path def readcache(cachepath): """ desc: Reads an object from a cachefile. arguments: cachepath: The full path to the cachefile. returns: An object that was cached. """ init_cache() with open(cachepath, u'rb') as fd: return pickle.load(fd) def writecache(a, cachepath): """ desc: Writes a cachefile for an object. arguments: a: The object to cache. This object should be pickleable. cachepath: The full path to the cachefile. """ init_cache() with open(cachepath, u'wb') as fd: pickle.dump(a, fd, protocol) PKffkH4Idatamatrix/convert.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix import DataMatrix try: import pandas as pd except ImportError: pd = None def wrap_pandas(fnc): """ visible: False desc: A decorator for pandas functions. It converts a DataMatrix to a DataFrame, passes it to a function, and then converts the returned DataFrame back to a DataMatrix. """ def inner(dm, *arglist, **kwdict): df_in = to_pandas(dm) df_out = fnc(df_in, *arglist, **kwdict) return from_pandas(df_out) inner.__doc__ = u'desc: A simple wrapper around the corresponding pandas function' return inner def to_pandas(dm): """ desc: Converts a DataMatrix to a pandas DataFrame. arguments: dm: type: DataMatrix returns: type: DataFrame """ d = {} for colname, col in dm.columns: d[colname] = list(col) return pd.DataFrame(d) def from_pandas(df): """ desc: Converts a pandas DataFrame to a DataMatrix. arguments: dm: type: DataFrame returns: type: DataMatrix """ from datamatrix import operations as ops dm = DataMatrix(length=len(df)) for colname in df.columns: if isinstance(colname, tuple): _colname = u'_'.join([str(i) for i in colname]) else: _colname = colname try: exec('%s = None' % _colname) except SyntaxError: dm[u'_%s' % _colname] = df[colname] else: dm[_colname] = df[colname] ops.auto_type(dm) return dm PKv|H0))datamatrix/operations.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix import DataMatrix, FloatColumn, IntColumn, SeriesColumn, \ MixedColumn from datamatrix._datamatrix._seriescolumn import _SeriesColumn from datamatrix._datamatrix._basecolumn import BaseColumn import random import warnings try: from datamatrix import convert import pandas as pd except ImportError as e: pass else: pivot_table = convert.wrap_pandas(pd.pivot_table) def weight(col): """ desc: | Weights a DataMatrix by a column. That is, each row from a DataMatrix is repeated as many times as the value in the weighting column. For example: A B --- 1 X 2 Y >>> weight(dm.A) A B --- 1 X 2 Y 2 Y arguments: col: desc: The column to weight by. type: BaseColumn returns: type: DataMatrix """ dm1 = col._datamatrix dm2 = DataMatrix(length=int(col.sum)) for colname, col in dm1.columns: dm2[colname] = type(col) i2 = 0 for i1, weight in enumerate(col): if not isinstance(weight, int) or weight < 0: raise TypeError(u'Weights should be non-negative integer values') for c in range(weight): for colname in dm1.column_names: dm2[colname][i2] = dm1[colname][i1] i2 += 1 return dm2 def split(col): """ desc: Splits a DataMatrix by unique values in a column. arguments: col: desc: The column to split by. type: BaseColumn returns: desc: A iterator over (value, DataMatrix) tuples. type: Iterator """ for val in col.unique: yield val, col == val def tuple_split(col, *values): """ desc: Splits a DataMatrix by values in a column, and returns the split as a tuple of DataMatrix objects. arguments: col: desc: The column to split by. type: BaseColumn argument-list: values: A list values to split. returns: A tuple of DataMatrix objects. example: | dm1, dm2 = tuple_split(dm.col, 1, 2) """ n_total = len(col) n_select = 0 l = [] for val in values: dm = col == val n = len(dm) if not n: warnings.warn('No matching rows for %s' % val) n_select += n l.append(dm) if n_select != n_total: warnings.warn('Some rows have not been selected') return tuple(l) def bin_split(col, bins): """ desc: Splits a DataMatrix into bins; that is, the DataMatrix is first sorted by a column, and then split into equal-size (or roughly equal-size) bins. arguments: col: desc: The column to split by. type: BaseColumn bins: desc: The number of bins. type: int returns: desc: A generator that iterators over the splits. example: | # Get the mean response time for 10 bins for dm_ in op.split(dm.response_time, bins=10): print(dm_.response_time.mean) """ if len(col) < bins: raise ValueError('More bins than rows') dm = sort(col._datamatrix, by=col) for i in range(bins): start = int(len(dm)/bins*i) end = int(1.*len(dm)/bins*(i+1)) yield dm[start:end] def fullfactorial(dm, ignore=u''): """ desc: | *Requires numpy* Creates a new DataMatrix that uses a specified DataMatrix as the base of a full-factorial design. That is, each value of every row is combined with each value from every other row. For example: A B --- x 3 y 4 >>> fullfactorial(dm) A B --- x 3 x 4 y 3 y 4 arguments: dm: desc: The source DataMatrix. type: DataMatrix keywords: ignore: A value that should be ignored. return: type: DataMatrix """ for colname, col in dm.columns: if not isinstance(col, MixedColumn): raise ValueError(u'fullfactorial only works with MixedColumns') design = [len(col != ignore) for name, col in dm.columns] a = _fullfact(design) fdm = DataMatrix(a.shape[0]) for name in dm.column_names: fdm[name] = u'' for i in range(a.shape[0]): row = a[i] for rownr, name in enumerate(dm.column_names): fdm[name][i] = dm[name][int(row[rownr])] return fdm def group(dm, by=None): """ desc: | *Requires numpy* Groups the DataMatrix by unique values in a set of grouping columns. Grouped columns are stored as SeriesColumns. The columns that are grouped should contain numeric values. For example: A B --- x 0 x 1 y 2 y 3 >>> group(dm, by=[dm.a]) Gives: A B --- x [0, 1] y [2, 3] arguments: dm: desc: The DataMatrix to group. type: DataMatrix keywords: by: A list of columns to group by. type: [list, None] returns: desc: A grouped DataMatrix. type: DataMatrix """ import numpy as np bycol = MixedColumn(datamatrix=dm) if by is not None: for col in by: if col._datamatrix is not dm: raise ValueError(u'By-columns are from a different DataMatrix') bycol += col keys = bycol.unique groupcols = [(name, col) for name, col in dm.columns if col not in by] nogroupcols = [(name, col) for name, col in dm.columns if col in by] cm = DataMatrix(length=len(keys)) for name, col in groupcols: if isinstance(col, _SeriesColumn): warnings.warn( u'Failed to create series for SeriesColumn s%s' % name) continue cm[name] = SeriesColumn(depth=0) for name, col in nogroupcols: cm[name] = col.__class__ for i, key in enumerate(keys): dm_ = bycol == key for name, col in groupcols: if isinstance(col, _SeriesColumn): continue if cm[name].depth < len(dm_[name]): cm[name].defaultnan = True cm[name].depth = len(dm_[name]) cm[name].defaultnan = False try: cm[name][i,:len(dm_[name])] = dm_[name] except ValueError: warnings.warn( u'Failed to create series for MixedColumn %s' % name) for name, col in nogroupcols: cm[name][i] = dm_[name][0] return cm def sort(obj, by=None): """ desc: Sorts a column or DataMatrix. In the case of a DataMatrix, a column must be specified to determine the sort order. In the case of a column, this needs to be specified if the column should be sorted by another column. arguments: obj: type: [DataMatrix, BaseColumn] by: desc: The sort key, that is, the column that is used for sorting the DataMatrix, or the other column. type: BaseColumn returns: desc: The sorted DataMatrix, or the sorted column. type: [DataMatrix, BaseColumn] """ if isinstance(obj, DataMatrix): if by is None: raise ValueError( 'The by keyword is required when sorting a DataMatrix') return obj._selectrowid(by._sortedrowid()) if by is None: by = obj col = obj._getrowidkey(by._sortedrowid()) col._rowid = obj._rowid return col def shuffle(obj): """ desc: Shuffles a DataMatrix or a column. If a DataMatrix is shuffle, the order of the rows is shuffled, but values that were in the same row will stay in the same row. arguments: obj: type: [DataMatrix, BaseColumn] returns: desc: The shuffled DataMatrix or column. type: [DataMatrix, BaseColumn] """ _rowid = list(obj._rowid) random.shuffle(_rowid) if isinstance(obj, DataMatrix): return obj._selectrowid(_rowid) col = obj._getrowidkey(_rowid) col._rowid = obj._rowid return col def shuffle_horiz(*obj): """ desc: Shuffles a DataMatrix, or several columns from a DataMatrix, horizontally. That is, the values are shuffled between columns from the same row. argument-list: desc: A list of BaseColumns, or a single DataMatrix. returns: desc: The shuffled DataMatrix. type: DataMatrix example: | dm = DataMatrix(length=2) dm.col1 = 'a', 'b' dm.col2 = 1, 2 dm.col3 = '-' # Shuffle all columns dm_shuffle = operations.shuffle_horiz(dm) print(dm_shuffle) # Shuffle only col1 and col2 dm_shuffle = operations.shuffle_horiz(dm.col1, dm.col2) print(dm_shuffle) """ if len(obj) == 1 and isinstance(obj[0], DataMatrix): obj = [column for colname, column in obj[0].columns] try: assert(len(obj) > 1) for column in obj: assert(isinstance(column, BaseColumn)) dm = obj[0]._datamatrix for column in obj: assert(dm == column._datamatrix) except AssertionError: raise ValueError( u'Expecting a DataMatrix or multiple BaseColumns from the same DataMatrix') dm = dm[:] dm_shuffle = dm[:] keep_only(dm_shuffle, obj) for row in dm_shuffle: random.shuffle(row) for colname, column in dm_shuffle.columns: dm._cols[colname] = column dm._mutate() return dm def keep_only(dm, cols=[]): """ desc: | Removes all columns from the DataMatrix, except those listed in `cols`. *Note:* This modifies the DataMatrix in place. arguments: dm: type: DataMatrix keywords: cols: desc: A list of column names, or columns. type: list """ colnames = [] for col in cols: if isinstance(col, basestring): colnames.append(col) continue if isinstance(col, BaseColumn): colnames.append(col.name) continue raise ValueError(u'Expecting column names or BaseColumn objects') for colname in dm.column_names: if colname not in colnames: del dm[colname] def auto_type(dm): """ desc: | Converts all columns of type MixedColumn to IntColumn if all values are integer numbers, or FloatColumn if all values are non-integer numbes. *Note:* This modifies the DataMatrix in place. arguments: dm: type: DataMatrix """ for name, col in dm.columns: if isinstance(col, (FloatColumn, IntColumn)): continue col_type = IntColumn for val in col: try: assert(int(val) == float(val)) except: try: float(val) col_type = FloatColumn except: break else: new_col = col_type(col._datamatrix) new_col[:] = col del dm[name] dm[name] = new_col dm._mutate() # Private function def _fullfact(levels): """ desc: Taken from pydoe. See: """ import numpy as np n = len(levels) # number of factors nb_lines = np.prod(levels) # number of trial conditions H = np.zeros((nb_lines, n)) level_repeat = 1 range_repeat = np.prod(levels) for i in range(n): range_repeat /= levels[i] lvl = [] for j in range(levels[i]): lvl += [j]*level_repeat rng = lvl*range_repeat level_repeat *= levels[i] H[:, i] = rng return H PKq|HY%LRRdatamatrix/__init__.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * import datamatrix.monkeypatch from datamatrix._datamatrix._row import Row from datamatrix._datamatrix._mixedcolumn import MixedColumn from datamatrix._datamatrix._numericcolumn import FloatColumn, IntColumn from datamatrix._datamatrix._seriescolumn import SeriesColumn from datamatrix._datamatrix._datamatrix import DataMatrix from datamatrix._cache import cached, iscached __version__ = '0.2.0' PK jH-a a datamatrix/rbridge/lme4.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ import os import time import subprocess from datamatrix import io, series, SeriesColumn, DataMatrix, cached from datamatrix._datamatrix._seriescolumn import _SeriesColumn from datamatrix.py3compat import * @cached def lmer(dm, formula): cmd = u''' library(lmerTest) result <- lmer(%s) s = summary(result) s; write.csv(s$coef, ".r-out.csv") ''' % formula rm = _launchr(dm, cmd) rm.rename(u'', u'effect') rm.rename(u'Estimate', u'est') rm.rename(u'Std. Error', u'se') rm.rename(u't value', u't') if u'Pr(>|t|)' in rm: rm.rename(u'Pr(>|t|)', u'p') else: rm.p = -1 return rm @cached def glmer(dm, formula, family): cmd = u''' library(lme4) result <- glmer(%s, family="%s") s = summary(result) s; write.csv(s$coef, ".r-out.csv") ''' % (formula, family) rm = _launchr(dm, cmd) rm.rename(u'', u'effect') rm.rename(u'Estimate', u'est') rm.rename(u'Std. Error', u'se') rm.rename(u'z value', u'z') if u'Pr(>|z|)' in rm: rm.rename(u'Pr(>|z|)', u'p') else: rm.p = -1 return rm @cached def lmer_series(dm, formula, winlen=1): col = formula.split()[0] depth = dm[col].depth rm = None for i in range(0, depth, winlen): wm = dm[:] wm[col] = series.reduce_( series.window(wm[col], start=i, end=i+winlen)) lm = lmer(wm, formula) print('Sample %d' % i) print(lm) if rm is None: rm = DataMatrix(length=len(lm)) rm.effect = list(lm.effect) rm.p = SeriesColumn(depth=depth) rm.t = SeriesColumn(depth=depth) rm.est = SeriesColumn(depth=depth) rm.se = SeriesColumn(depth=depth) for lmrow, rmrow in zip(lm, rm): rmrow.p[i:i+winlen] = lmrow.p rmrow.t[i:i+winlen] = lmrow.t rmrow.est[i:i+winlen] = lmrow.est rmrow.se[i:i+winlen] = lmrow.se return rm def _launchr(dm, cmd): dm = dm[:] # SeriesColumns cannot be saved to a csv file, so we delete those first. for name, col in dm.columns: if isinstance(col, _SeriesColumn): del dm[name] # Write the data to an input file io.writetxt(dm, u'.r-in.csv') # Launch R, read the data, and communicate the commands proc = subprocess.Popen( ['R', '--vanilla'], stdin=subprocess.PIPE) # proc = subprocess.Popen( ['R', '--vanilla'], stdin=subprocess.PIPE, # stdout=subprocess.PIPE, stderr=subprocess.PIPE) cmd = u'data <- read.csv(".r-in.csv")\nattach(data)\n%s' % cmd proc.communicate(safe_encode(cmd, u'ascii')) # Wait until the output file has been generated and return it while not os.path.exists(u'.r-out.csv'): time.sleep(.5) dm = io.readtxt(u'.r-out.csv') return dm PK'f3H_RRdatamatrix/rbridge/__init__.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * import datamatrix.monkeypatch from datamatrix._datamatrix._row import Row from datamatrix._datamatrix._mixedcolumn import MixedColumn from datamatrix._datamatrix._numericcolumn import FloatColumn, IntColumn from datamatrix._datamatrix._seriescolumn import SeriesColumn from datamatrix._datamatrix._datamatrix import DataMatrix from datamatrix._cache import cached, iscached __version__ = '0.1.0' PKacHH (datamatrix/_datamatrix/_numericcolumn.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix._datamatrix._basecolumn import BaseColumn import operator try: import numpy as np from scipy.stats import nanmean, nanmedian, nanstd nan = np.nan except ImportError: np = None nan = None class NumericColumn(BaseColumn): """ desc: A base class for FloatColumn and IntColumn. Don't use this class directly. """ dtype = float invalid = nan def __init__(self, datamatrix): if np is None: raise Exception(u'NumPy and SciPy are required, but not installed.') super(NumericColumn, self).__init__(datamatrix) @property def unique(self): return np.unique(self._seq) @property def mean(self): return nanmean(self._seq) @property def median(self): return nanmedian(self._seq) @property def std(self): return nanstd(self._seq) @property def max(self): if not len(self._seq): return np.nan return np.nanmax(self._seq) @property def min(self): if not len(self._seq): return np.nan return np.nanmin(self._seq) @property def sum(self): if not len(self._seq): return np.nan return np.nansum(self._seq) def _printable_list(self): return list(self._seq) def _init_rowid(self): self._rowid = np.array(self._datamatrix._rowid, dtype=int) def _init_seq(self): self._seq = np.empty(len(self._datamatrix), dtype=self.dtype) self._seq[:] = self.invalid def _checktype(self, value): try: return float(value) except: return np.nan def _tosequence(self, value, length): if isinstance(value, basestring): a = np.empty(length, dtype=self.dtype) a[:] = np.nan return a return super(NumericColumn, self)._tosequence(value, length) def _compare(self, other, op): i = np.where(op(self._seq, other))[0] return self._datamatrix._selectrowid(list(self._rowid[i])) def _operate(self, other, number_op, str_op=None): col = self._empty_col() col._rowid = self._rowid col._seq = number_op(self._seq, other) return col def _addrowid(self, _rowid): old_length = len(self) self._rowid = np.concatenate((self._rowid, _rowid)) a = np.empty(len(self._rowid), dtype=self.dtype) a[:old_length] = self._seq a[old_length:] = self.invalid self._seq = a def _getrowidkey(self, key): # We need to select all rows that match the rowids specified in key, # while preserving the order provided by key. To do this, we use the # following logic: # - Get a list of indices (`orig_indices`) that give a sorted view on # self._rowid. # - Use this to search through a sorted view of _rowid for all items in # key # - Map the matching indices, which refer to the sorted view of _rowid # back to a list of indices in the original, non-sorted array. # See also: http://stackoverflow.com/questions/9566592/\ # find-multiple-values-within-a-numpy-array col = self._empty_col() orig_indices = self._rowid.argsort() matching_indices = np.searchsorted(self._rowid[orig_indices], key) selected_indices = orig_indices[matching_indices] col._rowid = self._rowid[selected_indices] col._seq = self._seq[selected_indices] return col def _sortedrowid(self): return list(self._rowid[self._seq.argsort()]) def _merge(self, other, _rowid): col = self._empty_col() i_other = ~np.in1d(other._rowid, self._rowid) \ & np.in1d(other._rowid, _rowid) i_self = np.in1d(self._rowid, _rowid) col._rowid = np.concatenate( (self._rowid[i_self], other._rowid[i_other])) col._seq = np.concatenate((self._seq[i_self], other._seq[i_other])) return col._getrowidkey(_rowid) class FloatColumn(NumericColumn): """ desc: A column of numeric float values. Invalid values are marked as numpy.nan. """ pass class IntColumn(NumericColumn): """ desc: A column of numeric int values. Does not support invalid values. """ dtype = int invalid = 0 def _tosequence(self, value, length): if not isinstance(value, basestring): try: value = list(value) except: pass else: return super(NumericColumn, self)._tosequence(value, length) try: value = int(value) except: raise TypeError(u'IntColumn expects integers!') return super(NumericColumn, self)._tosequence(value, length) def _checktype(self, value): try: return int(value) except: raise TypeError(u'IntColumn expects integers!') def _operate(self, other, number_op, str_op=None): col = super(IntColumn, self)._operate(other, number_op, str_op=None) col._seq = col._seq.astype(self.dtype) return col def __div__(self, other): return self._operate(other, operator.floordiv) def __truediv__(self, other): return self._operate(other, operator.floordiv) PKC~]Hz0'datamatrix/_datamatrix/_seriescolumn.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * # from datamatrix._datamatrix._basecolumn import BaseColumn from datamatrix._datamatrix._numericcolumn import NumericColumn try: import numpy as np from scipy.stats import nanmean, nanmedian, nanstd except ImportError: np = None class _SeriesColumn(NumericColumn): """ desc: A column in which each cell is a numeric series. """ dtype = float def __init__(self, datamatrix, depth, defaultnan=False): """ desc: Constructor. You generally don't call this constructor correctly, but use the SeriesColumn helper function. arguments: datamatrix: desc: The DataMatrix to which this column belongs. type: DataMatrix depth: desc: The depth, ie. the number of values per cell. type: int """ if np is None: raise Exception(u'NumPy and SciPy are required, but not installed.') self._depth = depth self.defaultnan = defaultnan NumericColumn.__init__(self, datamatrix) def setallrows(self, value): """ desc: Sets all rows to a value, or series of values. arguments: value: A value, or series of values that has the same length as the depth of the column. """ value = self._checktype(value) self._seq[:] = value @property def unique(self): raise NotImplementedError(u'unique is not implemented for SeriesColumn') @property def depth(self): """ name: depth desc: A property to access and change the depth of the column. """ return self._depth @depth.setter def depth(self, depth): if depth == self._depth: return if depth > self._depth: seq = np.zeros( (len(self), depth), dtype=self.dtype) if self.defaultnan: seq[:] = np.nan seq[:,:self._depth] = self._seq self._seq = seq self._depth = depth return self._depth = depth self._seq = self._seq[:,:depth] @property def plottable(self): """ name: plottable desc: Gives a view of the traces where the axes have been swapped. This is the format that matplotlib.pyplot.plot() expects. """ return np.swapaxes(self._seq, 0, 1) @property def mean(self): return nanmean(self._seq, axis=0) @property def median(self): return nanmedian(self._seq, axis=0) @property def std(self): return nanstd(self._seq, axis=0) @property def max(self): return np.nanmax(self._seq, axis=0) @property def min(self): return np.nanmin(self._seq, axis=0) @property def sum(self): return np.nansum(self._seq, axis=0) # Private functions def _init_seq(self): self._seq = np.zeros( (len(self._datamatrix), self._depth), dtype=self.dtype) if self.defaultnan: self._seq[:] = np.nan def _ellipsize(self, a): """ visible: False desc: Creates an ellipsized represenation of an array. arguments: a: An array. returns: A string with an ellipsized representation. """ return u'%s ... %s' % (str(a[:2])[:-1], str(a[-2:])[1:]) def _printable_list(self): if self._depth <= 4: return list(self._seq) return [self._ellipsize(cell) for cell in self] def _operate(self, a, number_op, str_op=None): # For a 1D array with the length of the datamatrix, we create an array # in which the second dimension (i.e. the depth) is constant. This # allows us to do by-row operations. if isinstance(a, (list, tuple)): a = np.array(a, dtype=self.dtype) if isinstance(a, NumericColumn): a = np.array(a._seq) if isinstance(a, np.ndarray) and a.shape == (len(self), ): a2 = np.empty( (len(self), self._depth), dtype=self.dtype) np.rot90(a2)[:] = a a = a2 col = self._empty_col() col._rowid = self._rowid col._seq = number_op(self._seq, a) return col def _checktype(self, value): try: a = np.empty(self._depth, dtype=self.dtype) a[:] = value except: raise Exception('Invalid type: %s' % str(value)) return a def _tosequence(self, value, length): # For float and integers, we simply create a new (length, depth) array # with only this value if isinstance(value, (float, int)): a = np.empty( (len(self._datamatrix), self._depth), dtype=self.dtype) a[:] = value return a try: a = np.array(value, dtype=self.dtype) except: raise Exception('Cannot convert to sequence: %s' % str(value)) # For a 1D array with the length of the datamatrix, we create an array # in which the second dimension (i.e. the depth) is constant. if a.shape == (length, ): a2 = np.empty( (length, self._depth), dtype=self.dtype) np.rot90(a2)[:] = a return a2 # For a 2D array that already has the correct dimensions, we return it. if a.shape == (length, self._depth): return a raise Exception('Cannot convert to sequence: %s' % str(value)) def _empty_col(self): return self.__class__(self._datamatrix, depth=self._depth) def _addrowid(self, _rowid): old_length = len(self) self._rowid = np.concatenate((self._rowid, _rowid)) a = np.zeros( (len(self._rowid), self._depth), dtype=self.dtype) a[:old_length] = self._seq self._seq = a # Implemented syntax def __getitem__(self, key): if isinstance(key, tuple) and len(key) == 2: return self._seq[key].copy() return super(_SeriesColumn, self).__getitem__(key) def __setitem__(self, key, value): if isinstance(key, tuple) and len(key) == 2: self._seq[key] = value return return super(_SeriesColumn, self).__setitem__(key, value) def SeriesColumn(depth, defaultnan=False): return _SeriesColumn, {'depth' : depth, u'defaultnan' : defaultnan} PKXG$oo&datamatrix/_datamatrix/_mixedcolumn.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix._datamatrix._basecolumn import BaseColumn class MixedColumn(BaseColumn): """ desc: A column that can contain numeric and string values. """ pass PKfhoHI}/}/%datamatrix/_datamatrix/_basecolumn.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * import collections import numbers import operator import math class BaseColumn(object): """ desc: The base class for all columns. You should not use this class directly, but rather use MixedColumn or NumericColumn. """ default_value = u'' def __init__(self, datamatrix): """ desc: Constructor. arguments: datamatrix: A DataMatrix object. """ self._datamatrix = datamatrix self._init_rowid() self._init_seq() @property def unique(self): """ name: unique desc: An interator for all unique values that occur in the column. """ return list(sorted(set(self._seq))) @property def count(self): """ name: count desc: The number of unique values that occur in the column. """ return len(self.unique) @property def mean(self): """ name: mean desc: Arithmetic mean of all values. If there are non-numeric values, these are ignored. If there are no numeric values, None or np.nan is returned. """ n = self._numbers if len(n) == 0: return None return sum(n) / len(n) @property def median(self): """ name: median desc: The median of all values. If there are non-numeric values, these are ignored. If there are no numeric values, None or np.nan is returned. """ n = sorted(self._numbers) if len(n) == 0: return None i = int(len(n)/2) if len(n) % 2 == 1: return n[i] return .5*n[i]+.5*n[i-1] @property def std(self): """ name: std desc: The standard deviation of all values. If there are non-numeric values, these are ignored. If there are 0 or 1 numeric values, None or np.nan is returned. The degrees of freedom are N-1. """ m = self.mean n = self._numbers if len(n) <= 1: return None return math.sqrt(sum((i-m)**2 for i in n)/(len(n)-1)) @property def max(self): """ name: max desc: The highest numeric value in the column, or None or np.nan if there are no numeric values. """ n = self._numbers if not len(n): return None return max(n) @property def min(self): """ name: min desc: The lowest numeric value in the column, or None or np.nan if there are no numeric values. """ n = self._numbers if not len(n): return None return min(n) @property def sum(self): """ name: sum desc: The sum of all values in the column, or None or np.nan if there are no numeric values. """ n = self._numbers if not len(n): return None return sum(n) @property def name(self): for colname, col in self._datamatrix.columns: if col is self: return colname raise NameError( u'Column not found in DataMatrix, and therefore nameless') # Private functions @property def _numbers(self): return [float(val) for val in self._seq \ if isinstance(val, numbers.Number)] def _printable_list(self): """ visible: False desc: Creates a list object for this column. The preferred syntax is list(dm), although this is slightly slower. returns: type: list """ return self._seq def _init_rowid(self): """ visible: False desc: Intializes the _rowid property, which is an iterator that contains the row ids. """ self._rowid = self._datamatrix._rowid[:] def _init_seq(self): """ visible: False desc: Initializes the _seq property, which is an iterator that contains the data. """ self._seq = [self.default_value]*len(self._datamatrix) def _addrowid(self, _rowid): """ visible: False desc: Adds an empty row with the given row id. arguments: _rowid: A row id """ self._rowid += _rowid self._seq += [self.default_value]*len(_rowid) def _checktype(self, value): """ visible: False desc: Checks wether a value has a suitable type for this column, converts it if possible, and gives an error if necessary. arguments: value: A value to check. returns: A suitably typed value. """ try: assert(int(value) == value) value = int(value) except: try: # Make sure we don't convert 'inf' and 'nan' strings to float assert(not math.isinf(float(value))) assert(not math.isnan(float(value))) value = float(value) except: pass if value is None or isinstance(value, (numbers.Number, str)): return value if isinstance(value, bytes): return safe_decode(value) raise Exception('Invalid type: %s' % value) def _merge(self, other, _rowid): """ visible: False desc: Merges this column with another column, selecting only the rows indicated by _rowid. arguments: other: Another column. _rowid: A list of row ids to select. returns: type: BaseColumn """ col = self._empty_col() col._rowid = _rowid col._seq = [] for row in _rowid: if row in self._rowid: col._seq.append(self._seq[self._rowid.index(row)]) else: col._seq.append(other._seq[other._rowid.index(row)]) return col def _tosequence(self, value, length): """ visible: False desc: Creates a sequence with a specific length from a given value (which may already be a sequence). arguments: value: The value to turn into a sequence. length: The length of the sequence. returns: A sequence, that is, some iterable object. """ if isinstance(value, (numbers.Number, basestring)): return [value]*length try: value = list(value) except: raise Exception('Cannot convert to sequence: %s' % value) if len(value) != length: raise Exception('Sequence has incorrect length: %s' % len(value)) return [self._checktype(cell) for cell in value] def _getintkey(self, key): """ visible: False desc: Gets a value by index. arguments: key: An index. returns: A value. """ return self._seq[key] def _getslicekey(self, key): """ visible: False desc: Gets a slice of this column by a slice object. arguments: key: A slice object. returns: BaseColunn """ col = self._empty_col() col._rowid = self._rowid[key] col._seq = self._seq[key] return col def _getsequencekey(self, key): """ visible: False desc: Gets a slice of this column by list or some other iterable. arguments: key: A list or other iterable object. returns: BaseColunn """ col = self._empty_col() col._rowid = [] col._seq = [] for i in key: col._rowid.append(self._rowid[i]) col._seq.append(self._seq[i]) return col def _getrowidkey(self, key): """ visible: False desc: Gets a slice of this column by a list of row ids. arguments: key: A list of row ids. returns: BaseColunn """ col = self._empty_col() col._rowid = key col._seq = [] for _rowid in key: if _rowid not in self._rowid: continue col._seq.append(self._seq[self._rowid.index(_rowid)]) return col def _sortedrowid(self): """ visible: False desc: Gives a list of rowids that are ordered such that they sort the column. returns: An iterator. """ return [rowid for val, rowid in sorted(zip(self._seq, self._rowid))] def _setintkey(self, key, value): """ visible: False desc: Sets a value by index. arguments: key: An index. value: The value to set. """ self._seq[key] = self._checktype(value) def _setslicekey(self, key, value): """ visible: False desc: Sets a range of values by a slice object. arguments: key: A slice object. value: The value to set. This can be an iterable that matches the length of the slice. """ length = len(self._seq[key]) self._seq[key] = self._tosequence(value, length) def _setsequencekey(self, key, val): """ visible: False desc: Sets a range of values by a list or other iterable. arguments: key: A list or other iterable object. val: The value to set. This can be an iterable that matches the length of the key. """ for _key, _val in zip(key, self._tosequence(val, len(key))): if _key < 0 or _key >= len(self): raise Exception('Outside of range') self._seq[_key] = _val def _compare(self, other, op): """ visible: False desc: Selects rows from this column, and returns the entire DataMatrix. arguments: other: A value to compare to. op: An operator to perform the comparison. returns: type: DataMatrix """ _rowid = [] for rowid, val in zip(self._rowid, self._seq): try: if op(val, other): _rowid.append(rowid) except: pass return self._datamatrix._selectrowid(_rowid) def _operate(self, other, number_op, str_op=None): """ visible: False desc: Performs an operation on the entire column. arguments: other: The value to use for the operation, e.g. a number to multiply with. number_op: The operator to use for numeric values. keywords: str_op: The operator to use for string values, or None to leave strings untouched. returns: A modified column. """ col = self._empty_col() col._rowid = self._rowid col._seq = [] for i, (_other, val) in enumerate( zip(self._tosequence(other, len(self)), self._seq)): if isinstance(val, numbers.Number) \ and isinstance(_other, numbers.Number): col._seq.append(number_op(self._seq[i], _other)) elif str_op is not None: col._seq.append(str_op(safe_decode(self._seq[i]), safe_decode(_other))) else: col._seq.append(self._seq[i]) return col def _empty_col(self): """ visible: False desc: Create an empty column of the same type as the current column. returns: BaseColumn """ return self.__class__(self._datamatrix) # Implemented syntax def __str__(self): return u'col%s' % str(self._seq) def __len__(self): return len(self._seq) def __getitem__(self, key): if isinstance(key, int): return self._getintkey(key) if isinstance(key, slice): return self._getslicekey(key) if isinstance(key, collections.Sequence): return self._getsequencekey(key) raise Exception(u'Invalid key') def __setitem__(self, key, value): if isinstance(key, int): self._setintkey(key, value) elif isinstance(key, slice): self._setslicekey(key, value) elif isinstance(key, collections.Sequence): self._setsequencekey(key, value) else: raise Exception('Invalid assignment') self._datamatrix._mutate() def __gt__(self, other): return self._compare(other, operator.gt) def __ge__(self, other): return self._compare(other, operator.ge) def __lt__(self, other): return self._compare(other, operator.lt) def __le__(self, other): return self._compare(other, operator.le) def __eq__(self, other): if isinstance(other, BaseColumn): return self is other return self._compare(other, operator.eq) def __ne__(self, other): if isinstance(other, BaseColumn): return self is not other return self._compare(other, operator.ne) def __add__(self, other): return self._operate(other, operator.add, operator.concat) def __sub__(self, other): return self._operate(other, operator.sub) def __mul__(self, other): return self._operate(other, operator.mul) def __div__(self, other): return self._operate(other, operator.truediv) def __truediv__(self, other): return self._operate(other, operator.truediv) def __floordiv__(self, other): return self._operate(other, operator.floordiv) def __mod__(self, other): return self._operate(other, operator.mod) def __pow__(self, other): return self._operate(other, operator.pow) PK2~]HoW00%datamatrix/_datamatrix/_datamatrix.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix import Row from datamatrix._datamatrix._basecolumn import BaseColumn from datamatrix._datamatrix._mixedcolumn import MixedColumn import collections _id = 0 class DataMatrix(object): """ desc: A DataMatrix is a tabular data structure. """ def __init__(self, length=0, default_col_type=MixedColumn): """ desc: Constructor. keywords: length: desc: The starting length of the DataMatrix. type: int """ global _id object.__setattr__(self, u'_cols', collections.OrderedDict()) object.__setattr__(self, u'_rowid', list(range(length))) object.__setattr__(self, u'_default_col_type', default_col_type) object.__setattr__(self, u'_id', _id) _id += 1 @property def columns(self): return list(self._cols.items()) @property def column_names(self): return list(self._cols.keys()) @property def rows(self): return list(range(len(self))) @property def length(self): return len(self._rowid) @property def default_col_type(self): return self._default_col_type @property def is_2d(self): for name, col in self.columns: if hasattr(col, u'depth'): return False return True def rename(self, old, new): """ desc: Renames a column. Modifies the DataMatrix in place. raises: ValueError: When an error occurs. arguments: old: The old name. new: The new name. """ if old == new: return if old not in self._cols: raise ValueError(u'Column name does not exist') if new in self._cols: raise ValueError(u'Column name already exists') try: exec(u'%s = None' % new) except SyntaxError: raise ValueError(u'Invalid column name') # A rename recipe that preservers order. _cols = collections.OrderedDict([(new, v) if k == old else (k, v) \ for k, v in self._cols.items()]) object.__setattr__(self, u'_cols', _cols) self._mutate() # Private functions. These can also be called by the BaseColumn (and # derived) classes. def _fromdict(self, d={}): """ visible: False desc: Merges a dict into the DataMatrix. Modifies the DataMatrix in place. keywords: d: The dict to merge. returns: The modified DataMatrix. """ for name, col in d.items(): if len(col) > len(self): self.length = len(col) self[name] = self._default_col_type self[name][:len(col)] = col return self def _selectrowid(self, _rowid): """ visible: False desc: Selects rows from the current DataMatrix by row id (i.e. not by index). arguments: _rowid: An iterable list of row ids. returns: type: DataMatrix. """ dm = DataMatrix(len(_rowid)) object.__setattr__(dm, u'_rowid', _rowid) object.__setattr__(dm, u'_id', self._id) for name, col in self._cols.items(): dm._cols[name] = self._cols[name]._getrowidkey(_rowid) dm._cols[name]._datamatrix = dm return dm def _slice(self, key): """ visible: False desc: Selects rows from the current DataMatrix by indices (i.e. not by row id). arguments: key: A slice object, or a list of indices. returns: type: DataMatrix. """ if isinstance(key, slice): _rowid = self._rowid[key] else: try: _rowid = [self._rowid[row] for row in key] except: raise Exception('Invalid row indices') dm = DataMatrix(len(_rowid)) object.__setattr__(dm, u'_rowid', _rowid) object.__setattr__(dm, u'_id', self._id) for name, col in self._cols.items(): dm._cols[name] = self._cols[name][key] dm._cols[name]._datamatrix = dm return dm def _setlength(self, value): """ visible: False desc: | Changes the length of the current DataMatrix, adding or removing rows as necessary. *This modifies the current DataMatrix.* __Note__: The preferred way to change the length is by setting the length property: ~~~ dm.length = 10 ~~~ arguments: value: desc: The new length. type: int """ if value < len(self): object.__setattr__(self, u'_rowid', self._rowid[:value]) for name, col in self._cols.items(): self._cols[name] = self._cols[name][:value] else: if len(self) == 0: startid = 0 else: startid = max(self._rowid)+1 rowid = [rowid+startid for rowid in range(value-len(self))] object.__setattr__(self, u'_rowid', self._rowid+rowid) for name in self._cols: self._cols[name]._addrowid(rowid) self._mutate() def _set_default_col_type(self, col_type): """ visible: False desc: Sets the default column type. arguments: col_type: A column type (BaseColumn) """ if not isinstance(col_type, type) or not issubclass(col_type, BaseColumn): raise Exception(u'Not a valid column type') object.__setattr__(self, u'_default_col_type', col_type) def _merge(self, other, _rowid): """ visible: False desc: Merges the current DataMatrix with another DataMatrix, preserving only the rows indicated by _rowid. arguments: other: Another DataMatrix. _rowid: A list of row ids. returns: type: DataMatrix. """ if self != other: raise Exception('Can only merge related datamatrices') dm = DataMatrix(len(_rowid)) object.__setattr__(dm, u'_rowid', _rowid) object.__setattr__(dm, u'_id', self._id) for name, col in self._cols.items(): dm._cols[name] = self._cols[name]._merge(other._cols[name], _rowid) dm._cols[name]._datamatrix = dm return dm def _mergedict(self, d={}): """ visible: False desc: | Merges a dict into the DataMatrix. *This modifies the current DataMatrix.* keywords: d: desc: A dictionary, where each each key is a column name, and each value is a sequence of column values, or a single column value. type: dict returns: desc: The current DataMatrix. """ for name, col in d.items(): if isinstance(col, basestring): self.length = 1 elif len(col) > len(self): self.length = len(col) self[name] = self._default_col_type self[name][:len(col)] = col return self def _mutate(self): """ visible: False desc: Changes the id of the current DataMatrix. This is done whenever the DataMatrix has been modified. """ global _id object.__setattr__(self, u'_id', self._id) _id += 1 def _getcolbyobject(self, key): """ visible: False desc: Retrieves a column by object; that is, just return the object itself. arguments: key: type: BaseColumn returns: type: BaseColumn """ for col in self._cols.values(): if col is key: return col raise Exception('Column not found') def _getcolbyname(self, key): """ visible: False desc: Retrieves a column by name. arguments: key: type: str returns: type: BaseColumn """ for name, col in self._cols.items(): if name == key: return col raise Exception('Column not found') def _getrow(self, key): """ visible: False desc: Retrieves a row by key. arguments: key: type: A key that a Row object understands. returns: type: Row """ return Row(self, key) def _check_name(self, name): """ visible: False desc: Checks whether a name is a valid column name. raises: ValueError: If name is not valid. arguments: name: The name to check. """ try: exec('%s = None' % name) except SyntaxError: raise ValueError(u'Invalid column name: %s' % name) # Implemented syntax def __contains__(self, item): return item in self._cols.keys() def __len__(self): return len(self._rowid) def __eq__(self, other): return isinstance(other, DataMatrix) and other._id == self._id def __ne__(self, other): return not isinstance(other, DataMatrix) or other._id != self._id def __and__(self, other): selection = set(self._rowid) & set(other._rowid) return self._merge(other, sorted(selection)) def __or__(self, other): selection = set(self._rowid) | set(other._rowid) return self._merge(other, sorted(selection)) def __xor__(self, other): selection = set(self._rowid) ^ set(other._rowid) return self._merge(other, sorted(selection)) def __delattr__(self, name): if name not in self._cols: raise AttributeError(u'No column named %s' % name) del self._cols[name] def __setattr__(self, name, value): # self._check_name(name) if name == u'length': self._setlength(value) return if name == u'default_col_type': self._set_default_col_type(value) return # Create a new column by type if isinstance(value, type) and issubclass(value, BaseColumn): self._cols[name] = value(self) return # Create a new column by type, kwdict tuple if isinstance(value, tuple) and len(value) == 2 \ and isinstance(value[0], type) and issubclass(value[0], BaseColumn): cls, kwdict = value self._cols[name] = cls(self, **kwdict) return # Create new column by existing column if isinstance(value, BaseColumn): if value._datamatrix is not self: raise Exception( u'This column does not belong to this DataMatrix') self._cols[name] = value return if name not in self: self._cols[name] = self._default_col_type(self) self._cols[name][:] = value self._mutate() def __delitem__(self, value): # Delete column by object if isinstance(value, BaseColumn): for name, col in self._cols.items(): if col is value: del self._cols[name] return else: raise ValueError('Column not found: %s' % value) # Delete column by name if isinstance(value, basestring): if value in self._cols: del self._cols[value] return raise ValueError('Column not found: %s' % value) # Delete row by index. The trick is to first get the slice that we want # to delete, and then xor this with the current DataMatrix. if isinstance(value, int): value = value, _slice = self[value] ^ self object.__setattr__(self, u'_cols', _slice._cols) object.__setattr__(self, u'_rowid', _slice._rowid) def __setitem__(self, name, value): self.__setattr__(name, value) def __getattr__(self, name): if name in ('__getstate__', '_cols'): raise AttributeError() if name in self._cols: return self._cols[name] raise AttributeError() def __getitem__(self, key): if isinstance(key, BaseColumn): return self._getcolbyobject(key) if isinstance(key, basestring): return self._getcolbyname(key) if isinstance(key, int): return self._getrow(key) if isinstance(key, slice) or isinstance(key, collections.Sequence): return self._slice(key) raise Exception('Cannot get %s' % key) def __str__(self): if len(self) > 20: return str(self[:20]) + u'\n(+ %d rows not shown)' % (len(self)-20) import prettytable t = prettytable.PrettyTable() t.add_column('#', self._rowid) for name, col in list(self._cols.items())[:6]: t.add_column(name, col._printable_list()) if len(self._cols) > 6: return str(t) + u'\n(+ %d columns not shown)' % (len(self._cols)-5) return str(t) def __lshift__(self, other): if isinstance(other, dict): other = DataMatrix()._fromdict(other) dm = DataMatrix(len(self)+len(other)) for name, col in self._cols.items(): if hasattr(col, 'depth'): dm[name] = col.__class__(dm, col.depth, col.defaultnan) else: dm[name] = col.__class__ dm[name][:len(self)] = self[name] dm[name]._datamatrix = dm for name, col in other._cols.items(): if name not in dm._cols: if hasattr(col, 'depth'): dm[name] = col.__class__(dm, col.depth, col.defaultnan) else: dm[name] = col.__class__ elif hasattr(col, 'depth'): dm[name].depth = max(col.depth, dm[name].depth) other[name].depth = max(col.depth, dm[name].depth) dm[name][len(self):] = other[name] dm[name]._datamatrix = dm return dm def __iter__(self): for i in self.rows: yield self[i] PKOt|H\**datamatrix/_datamatrix/_row.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * class Row(object): """ desc: A single row from a DataMatrix. """ def __init__(self, datamatrix, index): """ desc: Constructor. arguments: datamatrix: A DataMatrix object. index: The row index. """ object.__setattr__(self, u'_datamatrix', datamatrix) object.__setattr__(self, u'_index', index) def __len__(self): return len(self._datamatrix.columns) def __getattr__(self, key): return self._datamatrix[key][self._index] def __getitem__(self, key): if isinstance(key, int): key = self._datamatrix.column_names[key] return self._datamatrix[key][self._index] def __setattr__(self, key, value): self._datamatrix[key][self._index] = value def __setitem__(self, key, value): if isinstance(key, int): key = self._datamatrix.column_names[key] self._datamatrix[key][self._index] = value def __str__(self): import prettytable t = prettytable.PrettyTable(["Name", "Value"]) for name, col in self._datamatrix.columns: t.add_row([name, self[name]]) return str(t) def __iter__(self): for col in self._datamatrix.column_names: yield col, self[col] PKb#HΆaF"datamatrix/_datamatrix/__init__.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ PK$H\ādatamatrix/colors/tango.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * butter = ['#fce94f', '#edd400', '#c4a000'] orange = ['#fcaf3e', '#f57900', '#ce5c00'] chocolate = ['#e9b96e', '#c17d11', '#8f5902'] chameleon = ['#8ae234', '#73d216', '#4e9a06'] skyblue = ['#729fcf', '#3465a4', '#204a87'] plum = ['#ad7fa8', '#75507b', '#5c3566'] scarletred = ['#ef2929', '#cc0000', '#a40000'] aluminium = ['#eeeeec', '#d3d7cf', '#babdb6', '#888a85', '#555753', '#2e3436'] yellow = butter brown = chocolate green = chameleon blue = skyblue purple = plum red = scarletred grey = aluminium gray = aluminium allcolors = butter + orange + chocolate + chameleon + skyblue + plum + \ scarletred brightcolors = [butter[0], orange[0], chameleon[0], skyblue[0], plum[0], \ scarletred[0]] PKZy$HΆaFdatamatrix/colors/__init__.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ PKchH. """ from datamatrix.py3compat import * from datamatrix import DataMatrix, MixedColumn import os import csv import collections import warnings def readtxt(path, delimiter=',', quotechar='"', default_col_type=MixedColumn): """ desc: Reads a DataMatrix from a csv file. arguments: path: The path to the pickle file. keywords: delimiter: The delimiter characer. quotechar: The quote character. default_col_type: The default column type. returns: A DataMatrix. """ d = collections.OrderedDict() with open(path) as csvfile: reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar) for column in next(reader): d[column] = [] for row in reader: all_columns = list(d.keys()) for column, val in zip(d.keys(), row): all_columns.remove(column) d[column].append(val) for column in all_columns: warnings.warn(u'Some rows miss column %s' % column) d[column].append(u'') dm = DataMatrix(default_col_type=default_col_type)._fromdict(d) return dm def writetxt(dm, path, delimiter=',', quotechar='"'): """ desc: Writes a DataMatrix to a csv file. arguments: dm: The DataMatrix to write. path: The path to the pickle file. keywords: delimiter: The delimiter characer. quotechar: The quote character. """ if not dm.is_2d: raise TypeError('Can only write 2D DataMatrix objects to csv') try: os.makedirs(os.path.dirname(path)) except: pass with open(path, 'w') as csvfile: writer = csv.writer(csvfile, delimiter=delimiter, quotechar=quotechar) writer.writerow([safe_str(colname) for colname in dm.column_names]) for row in dm: writer.writerow([safe_str(value) for colname, value in row]) PKchH'ٻ22datamatrix/io/_pickle.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * import pickle import os def readpickle(path): """ desc: Reads a DataMatrix from a pickle file. arguments: path: The path to the pickle file. returns: A DataMatrix. """ with open(path, 'rb') as picklefile: return pickle.load(picklefile) def writepickle(dm, path, protocol=-1): """ desc: Writes a DataMatrix to a pickle file. arguments: dm: The DataMatrix to write. path: The path to the pickle file. keywords: protocol: The pickle protocol. """ try: os.makedirs(os.path.dirname(path)) except: pass with open(path, 'wb') as picklefile: pickle.dump(dm, picklefile, protocol) PK7LQHQm1ffdatamatrix/io/__init__.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix.io._text import readtxt, writetxt from datamatrix.io._pickle import readpickle, writepickle from datamatrix.io._excel import readxlsx, writexlsx PKchH vvdatamatrix/io/_excel.py# -*- coding: utf-8 -*- """ This file is part of datamatrix. datamatrix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. datamatrix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with datamatrix. If not, see . """ from datamatrix.py3compat import * from datamatrix import DataMatrix, MixedColumn import warnings import os def readxlsx(path, default_col_type=MixedColumn): """ desc: Reads a DataMatrix from an Excel 2010 xlsx file. arguments: path: The path to the xlsx file. keywords: default_col_type: The default column type. returns: A DataMatrix. """ from openpyxl import load_workbook wb = load_workbook(path) rows = list(wb.active.rows) dm = DataMatrix(default_col_type=default_col_type, length=len(rows)-1) for cell in rows.pop(0): if cell.value is None: raise ValueError(u'Not all columns have a name on the first row') dm[cell.value] = default_col_type for i, row in enumerate(rows): for colname, cell in zip(dm.column_names, rows[i]): if cell.value is None: dm[colname][i] = default_col_type.default_value warnings.warn(u'Some rows miss column %s' % colname) else: dm[colname][i] = cell.value return dm def writexlsx(dm, path): """ desc: Writes a DataMatrix to an Excel 2010 xlsx file. arguments: dm: The DataMatrix to write. path: The path to the xlsx file. """ from openpyxl import Workbook try: os.makedirs(os.path.dirname(path)) except: pass wb = Workbook() ws = wb.active for colnr, colname in enumerate(dm.column_names): ws[chr(ord('A')+colnr)+'1'] = colname for rownr, row in enumerate(dm): for colnr, (colname, value) in enumerate(row): ws[chr(ord('A')+colnr)+str(rownr+2)] = value wb.save(path) PK,|H^- 1python_datamatrix-0.2.0.dist-info/DESCRIPTION.rstUNKNOWN PK,|HQr/python_datamatrix-0.2.0.dist-info/metadata.json{"classifiers": ["Development Status :: 4 - Beta", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Programming Language :: Python :: 2", "Programming Language :: Python :: 3"], "extensions": {"python.details": {"contacts": [{"email": "s.mathot@cogsci.nl", "name": "Sebastiaan Mathot", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://github.com/smathot/python-datamatrix"}}}, "generator": "bdist_wheel (0.26.0)", "license": "GNU GPL Version 3", "metadata_version": "2.0", "name": "python-datamatrix", "summary": "An intuitive, Pythonic way to work with tabular data", "version": "0.2.0"}PK,|Hi<9 /python_datamatrix-0.2.0.dist-info/top_level.txtdatamatrix PK,|Hndnn'python_datamatrix-0.2.0.dist-info/WHEELWheel-Version: 1.0 Generator: bdist_wheel (0.26.0) Root-Is-Purelib: true Tag: py2-none-any Tag: py3-none-any PK,|H@]\\*python_datamatrix-0.2.0.dist-info/METADATAMetadata-Version: 2.0 Name: python-datamatrix Version: 0.2.0 Summary: An intuitive, Pythonic way to work with tabular data Home-page: https://github.com/smathot/python-datamatrix Author: Sebastiaan Mathot Author-email: s.mathot@cogsci.nl License: GNU GPL Version 3 Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Intended Audience :: Science/Research Classifier: Topic :: Scientific/Engineering Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 3 UNKNOWN PK,|H;& & (python_datamatrix-0.2.0.dist-info/RECORDdatamatrix/__init__.py,sha256=geHCve9zqgsqavIMAT9---INCkBwwCDSFBqZGHnkCeY,1106 datamatrix/_cache.py,sha256=M3LYFMrwDpG3uhfpr6yNQfOsV_M1LOdrd_FDW_vybQc,3322 datamatrix/convert.py,sha256=Eq6ak5pZy0J_MpViBkPavtsfZi-cf2cJsxS8l4wzNsg,2048 datamatrix/dispatch.py,sha256=-FQJN2z3yFeaVQw-465I3-c16cPvLfJ0nDHBgvo5wfQ,4905 datamatrix/monkeypatch.py,sha256=r6KXJGBTU3j3N5-U-VQDX0psGWpFR2Ou1jcJmnOW2EQ,1244 datamatrix/operations.py,sha256=izznF-MLN5gpxA3WTlJuh-aTwgs1FOviy81d-MoSzrM,10705 datamatrix/plot.py,sha256=Onu5aQTzxBq3RBatYfmLwrG3_G_JDaPgqkdyomTxb34,4506 datamatrix/py3compat.py,sha256=AUQXcqlBal4xKcY8aRsqqrHX6RZLrNYeE3H0uOCoJLE,2300 datamatrix/series.py,sha256=46WXupZCb9kSYNecfxkwtBbcRuisOW-9Z04a7n6uoNA,10296 datamatrix/_datamatrix/__init__.py,sha256=TWkx4bMn2bkCzWopyRzcupiAhePutb5iW-XNMKoyDxs,673 datamatrix/_datamatrix/_basecolumn.py,sha256=JcPJAyjRR2TFav1oo8g5FiDty4y7xpGRDWBqA1Jz2zU,12157 datamatrix/_datamatrix/_datamatrix.py,sha256=6ZV40KgQDJTn3_s_5xZ-GJtpSz5GoN-wTDUBaQAX8qA,12470 datamatrix/_datamatrix/_mixedcolumn.py,sha256=_u_cs8uuXNeQCNDRhhTN8fXwAYaWxDqHi7vSfYKXLcc,879 datamatrix/_datamatrix/_numericcolumn.py,sha256=AtB1xMo_ASim3qhdEHTvtjmxe6xRTOa-C-DOai9XzGw,5340 datamatrix/_datamatrix/_row.py,sha256=wdDuhUFczZuj7DXMU7IssLfDykJV8Hyrx6d3FAFnILw,1834 datamatrix/_datamatrix/_seriescolumn.py,sha256=hUnB73HwX2Excp10Sd0rwaXezn2F8ZYaM21UHiVwxu0,6159 datamatrix/colors/__init__.py,sha256=TWkx4bMn2bkCzWopyRzcupiAhePutb5iW-XNMKoyDxs,673 datamatrix/colors/tango.py,sha256=nREMlZZN207yCzojN4qAWlaTz9rV80jE9nI8RBT8h8I,1409 datamatrix/io/__init__.py,sha256=TE6-ERpSRAfrC3Jyaxi8u9rmqTgq3mXnYeINZG9vgqI,870 datamatrix/io/_excel.py,sha256=sh-Hks79K2RCxuwdGfGedFFGWkxBt24ORtf1gHarJ08,2166 datamatrix/io/_pickle.py,sha256=w3VuOkiCYDgfK3NEnbuyYoYo57h5gAU6oPkIunvLiM8,1330 datamatrix/io/_text.py,sha256=wj2h7Tdlq_YiThdPNrTDJOq0sknX_3xWjcUDcIFkaoU,2364 datamatrix/rbridge/__init__.py,sha256=wVWWNqAGP4p1n4_95qdieHJ-8j78qJoT91wfbykYdLk,1106 datamatrix/rbridge/lme4.py,sha256=ZxoVqOm5nuzWl9JeBZRPOufzvnF80Ha0kWWA1QpSpHU,3169 python_datamatrix-0.2.0.dist-info/DESCRIPTION.rst,sha256=OCTuuN6LcWulhHS3d5rfjdsQtW22n7HENFRh6jC6ego,10 python_datamatrix-0.2.0.dist-info/METADATA,sha256=VTWq9oMGfS3_T3F_dhM4wTdJvV7X-X7yc_rOSg1Gw5M,604 python_datamatrix-0.2.0.dist-info/RECORD,, python_datamatrix-0.2.0.dist-info/WHEEL,sha256=GrqQvamwgBV4nLoJe0vhYRSWzWsx7xjlt74FT0SWYfE,110 python_datamatrix-0.2.0.dist-info/metadata.json,sha256=bU3oiJZXyaa42HqldRaDkRNaegaw7kLTOQ83uFtI-RE,743 python_datamatrix-0.2.0.dist-info/top_level.txt,sha256=x4GJlnqoJqlXkYEaG3Fa5L0sGEGI-KSzYwtTwirCNA4,11 PKdhHh{datamatrix/py3compat.pyPK~#Hݴw1 datamatrix/monkeypatch.pyPK\PPH))Ddatamatrix/dispatch.pyPKDRkH ˱!datamatrix/plot.pyPKSRrH 4z8(8(k3datamatrix/series.pyPK|MPH[qٞ [datamatrix/_cache.pyPKffkH4Iidatamatrix/convert.pyPKv|H0))4qdatamatrix/operations.pyPKq|HY%LRR;datamatrix/__init__.pyPK jH-a a datamatrix/rbridge/lme4.pyPK'f3H_RRZdatamatrix/rbridge/__init__.pyPKacHH (datamatrix/_datamatrix/_numericcolumn.pyPKC~]Hz0' datamatrix/_datamatrix/_seriescolumn.pyPKXG$oo&^datamatrix/_datamatrix/_mixedcolumn.pyPKfhoHI}/}/%datamatrix/_datamatrix/_basecolumn.pyPK2~]HoW00%datamatrix/_datamatrix/_datamatrix.pyPKOt|H\**Bdatamatrix/_datamatrix/_row.pyPKb#HΆaF"0Jdatamatrix/_datamatrix/__init__.pyPK$H\āMdatamatrix/colors/tango.pyPKZy$HΆaFRdatamatrix/colors/__init__.pyPKchH