import os
import sys
import re
import subprocess
import urllib
import urllib.request
from html.parser import HTMLParser

from meteor.compiler import Compiler

# TODO
# 1. urlopen is not done
# 2. css load from relative dir


help_msg = '''
Inline resources of a webpage.

Usage:
    meteor inline file1 file2
    --all          It inline only resources with __inline__ by default, turn this on to inline all.
    --image        base64 Inline image as well.
    --raw          whether resources needs to be compressed
'''

def config(parser):
    parser.set_defaults(func=action)
    parser.add_argument('files', metavar='file', type=str, nargs='+',
                       help='list of files to be processed')
    parser.add_argument('-a', '--all', action='store_true', help='build all')
    parser.add_argument('-i', '--image', action='store_true', help='inline images')
    parser.add_argument('-r', '--raw', action='store_true', help='whether compression should be applied')


class HTMLInliner(HTMLParser):
    def __init__(self, args):
        super().__init__(self)

        self.repls = []
        self.__compiler = Compiler()
        self.__path = []
        self.__singletags = ('link')
        self.__args = args
        self.__fixend = False # change end position when tag is closed

    def feed(self, text):
        self.text = text
        super().feed(text)

    def handle_starttag(self, tag, attrs):
        if self.__path and self.__path[-1] in self.__singletags:
            self.__path.pop()

        self.__path.append(tag)

        if tag == 'script':
            attrs_dict = dict(attrs)
            if 'src' not in attrs_dict:
                return

            src = attrs_dict['src']

        elif tag == 'link':
            attrs_dict = dict(attrs)
            if 'href' not in attrs_dict:
                return

            src = attrs_dict['href']
        else:
            return

        urlobj = urllib.parse.urlparse(src)
        qs = urllib.parse.parse_qs(urlobj.query, keep_blank_values=True)

        if self.__args.all or '__inline__' in qs:
            
            path = urlobj.path
            code = open(path, 'r').read()

            if tag == 'link' and self.__args.image:
                # inline images in css
                inliner = CssInliner(self.__args)
                inliner.feed(code)
                code = inliner.get_text()

            if not self.__args.raw:
                if tag == 'script':
                    code = self.__compiler.js_compress(code.encode()).decode()
                elif tag == 'link':
                    tag = 'style'
                    code = self.__compiler.css_compress(code.encode()).decode()
            attr_txt = ' '.join(['{}="{}"'.format(a,b) for (a,b) in attrs if a not in ('href', 'src')])
            repl = '<{tag} {attrs}>{code}</{tag}>'.format(tag=tag, attrs=attr_txt, code=code)

            pos = self.getpos()
            endcol = pos[1]+len(self.get_starttag_text())

            self.repls.append({'start': pos, 'end': (pos[0], endcol), 'repl': repl})
            self.__fixend = True

    def handle_data(self, data):
        if not self.__path:
            return

        tag = self.__path[-1] 

        if self.__args.all and tag in ('script', 'style'):

            if tag == 'script':
                if not self.__args.raw:
                    code = self.__compiler.js_compress(data.encode()).decode()
            elif tag == 'style':
                if self.__args.image:
                    # inline images in css
                    inliner = CssInliner(self.__args)
                    inliner.feed(data)
                    code = inliner.get_text()

                if not self.__args.raw:
                    code = self.__compiler.css_compress(code.encode()).decode()

            if 'code' not in vars():
                return

            pos = self.getpos()

            startcol = pos[1]
            linetotal = data.count('\n')

            endrow = pos[0] + linetotal
            if linetotal:
                endcol = len(data[data.rindex('\n'):]) - 1
            else:
                endcol = pos[1] + len(data)

            self.repls.append({'start': (pos[0], startcol), 'end': (endrow, endcol), 'repl': code})

    def handle_endtag(self, tag):
        self.__path.pop()

        # for external replacement, the close tag are already there
        if tag in ('script', 'link') and self.__fixend and self.repls:
            pos = self.getpos()
            self.repls[-1]['end'] = (pos[0], pos[1]+len(tag)+3)
            self.__fixend = False

    def get_text(self):
        '''
        Get text with replaced content
        '''

        if not self.repls:
            return self.text

        # put break points on a linear array
        repls = self.repls
        points = []
        for repl in repls:
            points.append(repl['start'])
            points.append(repl['end'])

        # convert row column to indices for all break points
        cur = 0
        indices = []
        for row, line in enumerate(self.text.splitlines()):
            row1 = row + 1
            while points and points[0][0] == row1:

                indices.append(cur+points[0][1])
                del points[0]

            cur += len(line) + 1

        # concat parts
        parts = []
        odd = True
        for i in range(0, len(indices)-1):
            if odd:
                parts.append(repls[int(i/2)]['repl'])
            else:
                parts.append(self.text[end:indices[i+1]])

            end = indices[i+1]
            odd = not odd

        # add head and tail
        parts.insert(0, self.text[:indices[0]])
        parts.append(self.text[indices[-1]:])
        
        return ''.join(parts)


class CssInliner():

    url_pat = re.compile(r'(?<=url\()(.*?)(?=\))')

    def __init__(self, args):
        self.__args = args
        self.repls = []
        self.__compiler = Compiler()

    def feed(self, css):
        self.text = css

        for mo in self.url_pat.finditer(css):
            url = mo.groups()[0]
            
            url = url.strip('\'\"')
            if url.startswith('http'):
                f = urllib.request.urlopen(url)
            else:
                f = open(url, 'rb')
                img = f.read()

            b = self.__compiler.base64image(img, url)
            self.repls.append({'start':mo.start() ,'end':mo.end(), 'repl': '"'+b+'"'})

    def get_text(self):
        '''
        Get text with replaced content
        '''

        if not self.repls:
            return self.text
        # put break points on a linear array
        repls = self.repls
        indices = []
        for repl in repls:
            indices.append(repl['start'])
            indices.append(repl['end'])

        # concat parts
        parts = []
        odd = True
        for i in range(0, len(indices)-1):
            if odd:
                parts.append(repls[int(i/2)]['repl'])
            else:
                parts.append(self.text[end:indices[i+1]])

            end = indices[i+1]
            odd = not odd

        # add head and tail
        parts.insert(0, self.text[:indices[0]])
        parts.append(self.text[indices[-1]:])
        
        return ''.join(parts)


def action(args):
    for fpath in args.files:
        path, ext = os.path.splitext(fpath)
        if ext not in ('.html', '.htm', '.css'):
            continue

        with open(fpath, 'r') as f:
            code = f.read()

        if ext == '.html' or ext == '.htm':
            inliner = HTMLInliner(args)
        elif ext == '.css':
            inliner = CssInliner(args)

        inliner.feed(code)
        code = inliner.get_text()

        out = open(path+'.inlined'+ext, 'w+')
        out.write(code)
        out.close()

