#!/usr/bin/env python
# -*- coding: utf-8
"""A script to call CONCOCT clustering on a merged anvi'o profile"""

import sys

import anvio
import anvio.utils as utils
import anvio.concoct as concoct
import anvio.filesnpaths as filesnpaths

from anvio.errors import ConfigError, FilesNPathsError


__author__ = "Christopher Quince"
__copyright__ = "Copyright 2015, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="A script to call CONCOCT clustering on a merged anvi'o profile")
    parser.add_argument('-p', '--profile-db', metavar = "PROFILE_DB", required = True,
                        help = 'Profile database.')
    parser.add_argument('-a', '--annotation-db', required = True, metavar = 'ANNOTATION_DB',
                        help = 'anvio annotation database.')
    parser.add_argument('-o', '--output-file', metavar = 'OUTPUT.txt', default = None,
                        help = 'Store results as a TAB-delimited file')
    parser.add_argument('--skip-db', default = False, action = 'store_true',
                        help = 'By default, results are stored in the profile database that is given as a parameter\
                                Use this flag if you would like to skip that step')
    parser.add_argument('--source-identifier', default = 'CONCOCT', metavar = 'SOURCE',
                        help = "The source identifier when results are stored in the profile database. The default id\
                                is '%(default)s'. If there is another entry for '%(default)s', it will be overwrotten\
                                with new results. Using this parameter you can avoid that.")
    parser.add_argument('--debug', action='store_true', help = 'Print out debug info.')
    args = parser.parse_args()



    try:
        source = args.source_identifier.strip()
        if not len(source):
            raise ConfigError, 'Nice try. Source identifier cannot be emtpy'
        try:
            utils.check_sample_id(source)
        except:
            raise ConfigError, '"%s" is not a proper source name. A proper one should be a single word and not contain\
                                ANY characters but digits, ASCII letters and underscore character(s). There should not be\
                                any space characters, and the source ID shoudl not start with a digit.' % source

        # make sure output file is writable before the analysis...
        if args.output_file:
            filesnpaths.is_output_file_writable(args.output_file)

        c = concoct.CONCOCT(args)
        c.cluster()

        if args.output_file:
            c.store_clusters_as_TAB_delimited_text(args.output_file)
        if not args.skip_db:
            c.store_clusters_in_db(source = source)

    except ConfigError, e:
        print e
        sys.exit(-1)
    except FilesNPathsError, e:
        print e
        sys.exit(-2)
