import os
import time
import numpy as np
from numpy import genfromtxt

def convert(fname, overwrite=False, delimiter=","):
    '''
    Simple function to make a python file out of a delimited text file.
    
    This fuction places the column-ordered data in a text file and puts
    it into a python file with a list for each column on a new line.  
    The python file takes the name fname.py and warns on overwrite.  

    Parameters
    ----------
    fname : string
        The filename to convert
    overwrite : bool
        Overwrite the python file if it already exists.  Default is False so 
        that the user is warned.
    delimiter : string
        The default is ",". If the delimiter is a space, then consecutive
        delimiters are merged.

    Examples
    ---------
    >>> from scikits.statsmodels.datasets.data_utils import convert
    >>> convert('/path/to/file.csv')
    
    There will now be a file /path/to/file.py that contains a list
    `names` and then a list for each name in names.

    Notes
    -----
    Uses numpy.genfromtxt
    It is currently assumed that the text file that contains the data
    has a row of headers.
    '''

#TODO: could be extended to use kwds to pass to np.genfromtxt
    getnames = open(fname) # scrape for header names, 
                           # do this because dtype=np.str
    names = getnames.readline()
    getnames.close()
    names = names.strip(os.linesep) # strip linesep
    names = names.strip("\r") # strip carriage return
    if delimiter.isspace():
        names = names.split(None)
    else:
        names = names.split(delimiter)
    for i in range(len(names)): names[i] = names[i].strip("\"'")
    if delimiter.isspace():
        dataset = genfromtxt(fname, dtype=np.str, skip_header=1)
    else:
        dataset = genfromtxt(fname, delimiter=delimiter, dtype=np.str, skip_header=1)
    dir,f = os.path.split(fname)
    f=f.split('.')
    new_file = os.path.join(dir,f[0]+'.py')
    if os.path.isfile(new_file):
        print 'Do you want to overwrite the existing file %s?' % new_file
        exist = raw_input("[y/n] > ")
        if 'y' in exist.lower():
            pass
        else:
            return
    f = open(new_file, 'w')
    t = time.strftime("%a, %d %b %Y, %H:%M %Z")
    f.write('# Autogenerated by .data_utils.convert on '+t+os.linesep*2)
    f.write('names = '+str(names).upper()+os.linesep*2)
    for i,name in enumerate(names):
        f.write(name.upper()+' = '+str(dataset[:,i].tolist())+os.linesep*2)
    f.close()
