#!/usr/bin/env python
from optparse import OptionParser
import numpy as np
import pandas
import panama
import os
import sys

if __name__ == "__main__":
    usage = "usage: %prog [options] gene_expr_file snp_file"
    parser = OptionParser(usage=usage)

    parser.add_option('-Q', '--latent-dim', dest='dim', action='store', type='int', default=None,
                      help='Fix latent dimensionality to the value specified')

    parser.add_option('-c', '--covariates', dest='cov', action='store', type='str', default=None,
                      help='Covariates file (optional)')

    parser.add_option('-d', '--output-dir', dest='output_directory', action='store', type='str', default='./',
                      help='Output directory')

    parser.add_option('-P', '--population-structure', dest='pop', action='store_true', help='Account for population structure (kinship matrix)')

    parser.add_option('-a', '--fdr-assoc', dest='fdr', action='store', type='float', default='0.05', help='FDR cutoff for PANAMA')

    (options, args) = parser.parse_args()

    if len(args)==0:
        print "ERROR: no files specified"
        sys.exit()

    expr = pandas.read_csv(args[0], header = 0, index_col = 0)
    snps = pandas.read_csv(args[1], header = 0, index_col = 0)

    assert np.all(expr.columns == snps.columns), "the samples are not in the same order!"
    sample_names = expr.columns
    gene_names = expr.index
    snp_names = snps.index

    cov = options.cov
    if cov is not None:
        cov = pandas.read_csv(cov, header = 0, index_col = 0)
        assert np.all(expr.columns == cov.columns)
        cov = cov.T.values

    Y = expr.T.values
    S = snps.T.values

    conf = panama.core.ConfounderGPLVM(Y, S, covariates=cov, population_structure=options.pop,
                                       num_factors=options.dim, FDR_assoc=options.fdr)


    conf.fit()
    qv, pv = conf.association_scan()

    # Write the results to file
    results_qv = pandas.DataFrame(qv, snp_names, gene_names)
    results_pv = pandas.DataFrame(pv, snp_names, gene_names)

    if not os.path.exists(options.output_directory):
        os.makedirs(options.output_directory)

    results_qv.to_csv(os.path.join(options.output_directory,"PANAMA_qv.csv"), sep = ',')
    results_pv.to_csv(os.path.join(options.output_directory,"PANAMA_pv.csv"), sep = ',')
