#!/usr/bin/env python
# -*- coding: utf-8

import sys
import argparse

import anvio
import anvio.tables as t
import anvio.dbops as dbops
import anvio.terminal as terminal 

from anvio.errors import ConfigError
from anvio.completeness import Completeness


__author__ = "A. Murat Eren"
__copyright__ = "Copyright 2015, The anvio Project"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()
progress = terminal.Progress()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='A script to generate completeness info for a given list of _splits_')
    parser.add_argument('splits_txt', metavar = 'SPLITS_TXT',
                        help = 'File with split names.')
    parser.add_argument('annotation_db', metavar = 'ANNOTATION_DB',
                        help = 'Annotation database to read from.')
    parser.add_argument('-e', '--min-e-value', default=1e-15, type=float, metavar = 'E-VALUE',
                        help = 'Minimum significance score of an HMM find to be considered as a valid hit.\
                                Default is %(default)g.')
    parser.add_argument('--list-sources', action='store_true', default=False,
                        help = 'Show available single-copy gene search results and exit.')
    parser.add_argument('--source', default=None,
                        help = 'Source to focus on. If none declared, all single-copy gene sources\
                                are going to be listed.')
    
    args = parser.parse_args()

    completeness = Completeness(args.annotation_db, args.source)

    if args.list_sources:
        run.info('Available singlecopy sources', ', '.join(completeness.sources))
        sys.exit()

    splits_in_users_list = set([s.strip() for s in open(args.splits_txt).readlines() if s.strip() and not s.startswith('#')])

    annotation_db = dbops.AnnotationDatabase(args.annotation_db)
    splits_in_db = set(annotation_db.db.get_table_as_dict(t.splits_info_table_name).keys())
    annotation_db.disconnect()

    common_splits = splits_in_db.intersection(splits_in_users_list)

    if len(common_splits) != len(splits_in_users_list):
        if not len(common_splits):
            run.warning('None of the split names you provided in %s matched split names in the database...' % args.splits_txt)
            sys.exit()
        else:
            run.warning('Only %d of %d split names you listed in "%s" matched split names in the database...'\
                                            % (len(common_splits), len(splits_in_users_list), args.splits_txt))

    try:
        results = completeness.get_info_for_splits(common_splits, min_e_value = args.min_e_value)
    except ConfigError, e:
        print e
        sys.exit(-1)

    run.warning('', header = 'Completeness for %d splits (p < %g)' % (len(splits_in_users_list), args.min_e_value))
    for source in completeness.sources:
        run.info(source, '%.2f%% complete, %.2f%% redundant' % (results[source]['percent_complete'], results[source]['percent_redundancy']))

    print

