#    Gedit snippets plugin
#    Copyright (C) 2006-2007  Jesse van den Kieboom <jesse@icecrew.nl>
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import re

class Token:
    def __init__(self, klass, data):
        self.klass = klass
        self.data = data

    def __str__(self):
        return '%s: [%s]' % (self.klass, self.data)

    def __eq__(self, other):
        return self.klass == other.klass and self.data == other.data

    def __ne__(self, other):
        return not self.__eq__(other)

class Parser:
    SREG_ENV = '[A-Z_]+'
    SREG_ID = '[0-9]+'

    REG_ESCAPE = re.compile('(\\$(%s|\\(|\\{|<|%s)|`|\\\\)' % (SREG_ENV, SREG_ID))

    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.position = 0
        self.data_length = len(self.data)

        self.RULES = (self._match_env, self._match_regex, self._match_placeholder, self._match_shell, self._match_eval, self._text)

    def remains(self):
        return self.data[self.position:]

    def next_char(self):
        if self.position + 1 >= self.data_length:
            return ''
        else:
            return self.data[self.position + 1]

    def char(self):
        if self.position >= self.data_length:
            return ''
        else:
            return self.data[self.position]

    def token(self):
        self.tktext = ''

        while self.position < self.data_length:
            try:
                # Get first character
                func = {'$': self._rule,
                    '`': self._try_match_shell}[self.char()]
            except:
                func = self._text

            # Detect end of text token
            if func != self._text and self.tktext != '':
                return Token('text', self.tktext)

            tk = func()

            if tk:
                return tk

        if self.tktext != '':
            return Token('text', self.tktext)

    def _need_escape(self):
        text = self.remains()[1:]

        if text == '':
            return False

        return self.REG_ESCAPE.match(text)

    def _escape(self):
        if not self._need_escape():
            return

        # Increase position with 1
        self.position += 1

    def _text(self):
        if self.char() == '\\':
            self._escape()

        self.tktext += self.char()
        self.position += 1

    def _rule(self):
        for rule in self.RULES:
            res = rule()

            if res:
                return res

    def _match_env(self):
        text = self.remains()
        match = re.match('\\$(%s)' % self.SREG_ENV, text) or re.match('\\${(%s)}' % self.SREG_ENV, text)

        if match:
            self.position += len(match.group(0))
            return Token('environment', match.group(1))

    def _parse_list(self, lst):
        pos = 0
        length = len(lst)
        items = []
        last = None

        while pos < length:
            char = lst[pos]
            next = pos < length - 1 and lst[pos + 1]

            if char == '\\' and (next == ',' or next == ']'):
                char = next
                pos += 1
            elif char == ',':
                if last != None:
                    items.append(last)

                last = None
                pos += 1
                continue

            last = (last != None and last + char) or char
            pos += 1

        if last != None:
            items.append(last)

        return items

    def _parse_default(self, default):
        match = re.match('^\\s*(\\\\)?(\\[((\\\\]|[^\\]])+)\\]\\s*)$', default)

        if not match:
            return [default]

        groups = match.groups()

        if groups[0]:
            return [groups[1]]

        return self._parse_list(groups[2])

    def _match_placeholder(self):
        text = self.remains()

        match = re.match('\\${(%s)(:((\\\\\\}|[^}])+))?}' % self.SREG_ID, text) or re.match('\\$(%s)' % self.SREG_ID, text)

        if not match:
            return None

        groups = match.groups()
        default = ''
        tabstop = int(groups[0])
        self.position += len(match.group(0))

        if len(groups) > 1 and groups[2]:
            default = self._parse_default(groups[2].replace('\\}', '}'))

        return Token('placeholder', {'tabstop': tabstop, 'default': default})

    def _match_shell(self):
        text = self.remains()
        match = re.match('`((%s):)?((\\\\`|[^`])+?)`' % self.SREG_ID, text) or re.match('\\$\\(((%s):)?((\\\\\\)|[^\\)])+?)\\)' % self.SREG_ID, text)

        if not match:
            return None

        groups = match.groups()
        tabstop = (groups[1] and int(groups[1])) or -1
        self.position += len(match.group(0))

        if text[0] == '`':
            contents = groups[2].replace('\\`', '`')
        else:
            contents = groups[2].replace('\\)', ')')

        return Token('shell', {'tabstop': tabstop, 'contents': contents})

    def _try_match_shell(self):
        return self._match_shell() or self._text()

    def _eval_options(self, options):
        reg = re.compile(self.SREG_ID)
        tabstop = -1
        depend = []

        options = options.split(':')

        for opt in options:
            if reg.match(opt):
                tabstop = int(opt)
            else:
                depend += self._parse_list(opt[1:-1])

        return (tabstop, depend)

    def _match_eval(self):
        text = self.remains()

        options = '((%s)|\\[([0-9, ]+)\\])' % self.SREG_ID
        match = re.match('\\$<((%s:)*)((\\\\>|[^>])+?)>' % options, text)

        if not match:
            return None

        groups = match.groups()
        (tabstop, depend) = (groups[0] and self._eval_options(groups[0][:-1])) or (-1, [])
        self.position += len(match.group(0))

        return Token('eval', {'tabstop': tabstop, 'dependencies': depend, 'contents': groups[5].replace('\\>', '>')})

    def _match_regex(self):
        text = self.remains()

        content = '((?:\\\\[/]|\\\\}|[^/}])+)'
        match = re.match('\\${(?:(%s):)?\\s*(%s|\\$([A-Z_]+))?[/]%s[/]%s(?:[/]([a-zA-Z]*))?}' % (self.SREG_ID, self.SREG_ID, content, content), text)

        if not match:
            return None

        groups = match.groups()
        tabstop = (groups[0] and int(groups[0])) or -1
        inp = (groups[2] or (groups[1] and int(groups[1]))) or ''

        pattern = re.sub('\\\\([/}])', '\\1', groups[3])
        substitution = re.sub('\\\\([/}])', '\\1', groups[4])
        modifiers = groups[5] or ''

        self.position += len(match.group(0))

        return Token('regex', {'tabstop': tabstop, 'input': inp, 'pattern': pattern, 'substitution': substitution, 'modifiers': modifiers})

# ex:ts=4:et:
