{ "info": { "author": "Florian Finkernagel", "author_email": "finkernagel@coonabibba.de", "bugtrack_url": null, "classifiers": [], "description": "An implemention of an almost R like DataFrame object. \nUsage: \n u = DataFrame( { \"Field1\": [1, 2, 3], \n \"Field2\": ['abc', 'def', 'hgi']},\n optional:\n ['Field1', 'Field2']\n [\"rowOne\", \"rowTwo\", \"thirdRow\"])\n\nA DataFrame is basically a table with rows and columns. \n\nColumns are named, rows are numbered (but can be named) and can be easily selected and calculated upon. Internally, columns are stored as 1d numpy arrays. If you set row names, they're converted into a dictionary for fast access. There is a rich subselection/slicing API, see help(DataFrame.get_item) (it also works for setting values). Please note that any slice get's you another DataFrame, to access individual entries use get_row(), get_column(), get_value(). \n\nDataFrames also understand basic arithmetic and you can either add (multiply,...) a constant value, or another DataFrame of the same size / with the same column names, like this: \n#multiply every value in ColumnA that is smaller than 5 by 6.\nmy_df[my_df[:,'ColumnA'] < 5, 'ColumnA'] *= 6\n\n#you always need to specify both row and column selectors, use : to mean everything\nmy_df[:, 'ColumnB'] = my_df[:,'ColumnA'] + my_df[:, 'ColumnC']\n\n#let's take every row that starts with Shu in ColumnA and replace it with a new list (comprehension)\nselect = my_df.where(lambda row: row['ColumnA'].startswith('Shu'))\nmy_df[select, 'ColumnA'] = [row['ColumnA'].replace('Shu', 'Sha') for row in my_df[select,:].iter_rows()]\n\nDataframes talk directly to R via rpy2 (rpy2 is not a prerequiste for the library!) \nfrom dataframe import DataFrame\nfrom rpy2 import robjects as ro\nmy_df = DataFrame({\"ColumnA\": [1,2,3], 'ColumnB': ['sha','sha','shu']})\nro.r['print'](my_df)\n\nCombine DataFrames on rows or columns:\nmy_df = a.rbind_copy(b) # a and b have the same columns\nmy_df = a.cbind_view(b) # my_df is a composite sharing numpy arrays (columns) with a and b\nmy_d = a.join_columns_on(b, 'Name_in_A', 'Name_in_B\") #join on common values\n\n#manipulate DataFrame columns\nmy_df.insert_column(\"new_column_name\", [1,2, 3])\nmy_df.drop_column('dropped_column_name')\nmy_df.drop_all_columns_except('keep_me_please', 'keep_me_as_well')\nmy_df.rename_column(\"old\",\"new\")\nprint my_df.get_column_names()\nmy_df.impose_partial_column_order(['FirstColumn','Second_Column'],['pen_ultimate_column','ultimate_column']) # set the column order. Everything between the first and the second list (unspecified columns) get's sorted alphabetically\n\n#access data\nmy_df[100, \"ColumnA\"] #a new DataFrame with one column and one row\nmy_df.get_value(100, 'ColumnA') #whatever was in in row 100, column 'ColumnA' (string, int, object...)\nmy_df.get_row(100) # -> {\"ColumnA\": value, \"ColumnB\": another_value}\nmy_df.get_row_as_list(100) # -> [value, another_value], in order of my_df.columns_ordered\nmy_df.get_column('columnA') # numpy array of the column (a copy)\nmy_df.get_column_view('columnA') # the actual underlying numpy array\n\n#iterate across the data\nmy_df.iter_rows() # iter rows as dictionarys\nmy_df.iter_rows_as_list() # iter rows as lists (see get_row_as_list())\nmy_df.iter_values_columns_first() #value by value, first column row 1, first column row 2...\nmy_df.iter_values_rows_first() #value by value, first column, row 1, second column, row 1\n\n#turn into boolean array for subselection\nmy_df.where(lambda row: row['ColumnA'].startswith(\"Hello\") and row['ColumnB'] >=5)\nmy_df[:,\"Just_one_column\"] > 5 # any comparison\n\n#sort\nsorted_df = my_df.sort_by(\"ColumnA\") # copy sorted by ColumnA ascending\nsorted_df = my_df.sort_by(\"ColumnA\", False) # copy sorted by ColumnA descending\nsorted_df = my_df.sort_by([\"ColumnA\", 'ColumnB'], [False, True]) # copy sorted by ColumnA descending, then Column B ascending \n\n#aggregation functions\nmy_df.mean('ColumnA') # - average (mean) of the values in ColumnA\nmy_df.mean_and_std('ColumnA') # - mean and standard deviation of the values in ColumnA\n\n#translate columns\nmy_df.turn_into_level('ColumnA') #turns into R compatible factor. Optional: order of levels\nmy_df.digitize_column('ColumnA') # bin the values\nmy_df.rankify_column('ColumnA', True) # turn into ranks, Ascending (0.5, 0.6, 0.55) -> 0, 2, 1\nmy_df.rescale_column_0_1('ColumnA') # rescales a column to lie within 0..1 (inclusive)\n\n\n#import and export\nmy_df = pydataframe.DF2CSV().read(\"filename\", dialect=pydataframe.TabDialect(), handle_quotes=True) #read a tab seperated value file. Lot's of options, please check the code\npydataframe.DF2CSV().write(my_df, filename, dialect=pydataframe.TabDialect()) # write a tab seperated value file\npydataframe.DF2Excel().read(filename)\npydataframe.DF2Excel().write(my_filename)\n \n\n\n", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://code.google.com/p/pydataframe/", "keywords": "", "license": "BSD", "maintainer": "", "maintainer_email": "", "name": "pydataframe", "package_url": "https://pypi.org/project/pydataframe/", "platform": "", "project_url": "https://pypi.org/project/pydataframe/", "project_urls": { "Homepage": "http://code.google.com/p/pydataframe/" }, "release_url": "https://pypi.org/project/pydataframe/0.1.7/", "requires_dist": null, "requires_python": "", "summary": "A DataFrame (table like datastructure) for Python, similar to R's dataframe based on numpy arrays", "version": "0.1.7" }, "last_serial": 4036088, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "0ba7aec7dddaa3dc744b6004aeb57c5e", "sha256": "65903376d17c82c2aac02873c0497aa218285e903eecdc0787c9fb0c1f9d7243" }, "downloads": -1, "filename": "pydataframe-0.1.tar.gz", "has_sig": false, "md5_digest": "0ba7aec7dddaa3dc744b6004aeb57c5e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31927, "upload_time": "2010-05-07T13:34:29", "url": "https://files.pythonhosted.org/packages/46/9c/33cc6c2740f08c8dd24d39b0ea4ace746b06c0559e8c363607da23651f40/pydataframe-0.1.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "c0ee68cdf8c144a3a2d8547a681507e1", "sha256": "d78e2431e16d78d09af95e56344efdcf3c8b10a2c91f5792dfe790f7a785c18a" }, "downloads": -1, "filename": "pydataframe-0.1.1.tar.gz", "has_sig": false, "md5_digest": "c0ee68cdf8c144a3a2d8547a681507e1", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 34382, "upload_time": "2011-01-28T15:55:45", "url": "https://files.pythonhosted.org/packages/dc/55/39a2317e6ed8d0c4c71dcbf6256544a1537a78acfe2ee9a0c2f046a3f4e6/pydataframe-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "c823867a9ca1a476ada9227888020fd9", "sha256": "e4392a293f617e034a081e9c1b034bc5040a5115913529a0d6f539be42fcdba5" }, "downloads": -1, "filename": "pydataframe-0.1.2.tar.gz", "has_sig": false, "md5_digest": "c823867a9ca1a476ada9227888020fd9", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 37118, "upload_time": "2011-05-19T11:53:02", "url": "https://files.pythonhosted.org/packages/4e/16/05fafb78f697fe152a9e418e918fdb4d5c56ddd6894d609ddb5bd8b3a770/pydataframe-0.1.2.tar.gz" } ], "0.1.3": [ { "comment_text": "", "digests": { "md5": "0b71d33f19526bf89104638aba4328db", "sha256": "93fb0aa71ed59e40b6ca442a0a262e218ef2795fb4c34ee088240f028201136a" }, "downloads": -1, "filename": "pydataframe-0.1.3.tar.gz", "has_sig": false, "md5_digest": "0b71d33f19526bf89104638aba4328db", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 37173, "upload_time": "2011-05-19T13:02:07", "url": "https://files.pythonhosted.org/packages/c4/6d/5268fc2f058bc6c887447c04a5838e621b767cc9e426d4cc1bdbad7833d2/pydataframe-0.1.3.tar.gz" } ], "0.1.4": [ { "comment_text": "", "digests": { "md5": "6b18c314083d4df238f7b15e3f2ed666", "sha256": "51b9a2d8d95dd6729930e3e8ebd54e3a04d90f3e0a4b71061a552b2705428e50" }, "downloads": -1, "filename": "pydataframe-0.1.4.tar.gz", "has_sig": false, "md5_digest": "6b18c314083d4df238f7b15e3f2ed666", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 36009, "upload_time": "2011-07-01T13:20:01", "url": "https://files.pythonhosted.org/packages/ea/5b/726e6e17f0a7e5d4e5921696417ff3a083d3d83067b11ea9a7c69afd015c/pydataframe-0.1.4.tar.gz" } ], "0.1.5": [ { "comment_text": "", "digests": { "md5": "e52c4f52a61cddbae80d2a4c054e16ed", "sha256": "753abea9b3d4e83fb523a1d3a380aec366527665b111346670084a0af859fbda" }, "downloads": -1, "filename": "pydataframe-0.1.5.tar.gz", "has_sig": false, "md5_digest": "e52c4f52a61cddbae80d2a4c054e16ed", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 36102, "upload_time": "2012-01-30T14:58:05", "url": "https://files.pythonhosted.org/packages/32/fb/aaca7cf3d2f8f57e98edc9d1c3d3c6bd5defcb5f9fad1271e2d81abaca5b/pydataframe-0.1.5.tar.gz" } ], "0.1.6.150": [ { "comment_text": "", "digests": { "md5": "4158d5d4c8d704303e66f9e941170e13", "sha256": "fba756cb50bcb91079c4692910f64faa6494a7056d42f6fd14a627658b721616" }, "downloads": -1, "filename": "pydataframe-0.1.6.150.tar.gz", "has_sig": false, "md5_digest": "4158d5d4c8d704303e66f9e941170e13", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 36048, "upload_time": "2012-01-30T16:58:38", "url": "https://files.pythonhosted.org/packages/93/86/867d27e78b143caa7e4e7eb236c990673ddeb171bcadcda5fea5146994a1/pydataframe-0.1.6.150.tar.gz" } ], "0.1.6.175": [], "0.1.6.180": [ { "comment_text": "", "digests": { "md5": "bff4acd3a632fe0441ca3b299163fddf", "sha256": "484d34f0a3d548102efcb68904591109c3a06ed54b822fea27dd158f09700355" }, "downloads": -1, "filename": "pydataframe-0.1.6.180.tar.gz", "has_sig": false, "md5_digest": "bff4acd3a632fe0441ca3b299163fddf", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 40740, "upload_time": "2013-02-04T16:07:07", "url": "https://files.pythonhosted.org/packages/2b/d8/2e37ac8237976d711df82c25d3454fd3dbe5b9b843ad0a086441d429fa6e/pydataframe-0.1.6.180.tar.gz" } ], "0.1.7": [ { "comment_text": "", "digests": { "md5": "6c10917bc6fa68ee654728236677a38c", "sha256": "72198fd5f51c84a2537fdb6015afa077b3616a381fa1e6d545d67d394090a628" }, "downloads": -1, "filename": "pydataframe-0.1.7.tar.gz", "has_sig": false, "md5_digest": "6c10917bc6fa68ee654728236677a38c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31478, "upload_time": "2018-07-06T11:20:53", "url": "https://files.pythonhosted.org/packages/aa/e3/d51366a892acb543c79535f29ef46483ca5213f28aa73eabf5a9ace2d5b8/pydataframe-0.1.7.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "6c10917bc6fa68ee654728236677a38c", "sha256": "72198fd5f51c84a2537fdb6015afa077b3616a381fa1e6d545d67d394090a628" }, "downloads": -1, "filename": "pydataframe-0.1.7.tar.gz", "has_sig": false, "md5_digest": "6c10917bc6fa68ee654728236677a38c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31478, "upload_time": "2018-07-06T11:20:53", "url": "https://files.pythonhosted.org/packages/aa/e3/d51366a892acb543c79535f29ef46483ca5213f28aa73eabf5a9ace2d5b8/pydataframe-0.1.7.tar.gz" } ] }