0a122b
From db918ab21be13a3c1f3c65d3821c06cf12528099 Mon Sep 17 00:00:00 2001
0a122b
Message-Id: <db918ab21be13a3c1f3c65d3821c06cf12528099.1387369730.git.minovotn@redhat.com>
0a122b
In-Reply-To: <091eecc4fa42754760dfff393dabcc2b444e9693.1387369730.git.minovotn@redhat.com>
0a122b
References: <091eecc4fa42754760dfff393dabcc2b444e9693.1387369730.git.minovotn@redhat.com>
0a122b
From: Markus Armbruster <armbru@redhat.com>
0a122b
Date: Tue, 10 Dec 2013 15:29:13 +0100
0a122b
Subject: [PATCH 13/21] qapi.py: Restructure lexer and parser
0a122b
0a122b
RH-Author: Markus Armbruster <armbru@redhat.com>
0a122b
Message-id: <1386689361-30281-11-git-send-email-armbru@redhat.com>
0a122b
Patchwork-id: 56127
0a122b
O-Subject: [PATCH 7.0 qemu-kvm 10/18] qapi.py: Restructure lexer and parser
0a122b
Bugzilla: 997915
0a122b
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
0a122b
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
0a122b
RH-Acked-by: Luiz Capitulino <lcapitulino@redhat.com>
0a122b
0a122b
From: Markus Armbruster <armbru@redhat.com>
0a122b
0a122b
The parser has a rather unorthodox structure:
0a122b
0a122b
    Until EOF:
0a122b
0a122b
        Read a section:
0a122b
0a122b
            Generator function get_expr() yields one section after the
0a122b
            other, as a string.  An unindented, non-empty line that
0a122b
            isn't a comment starts a new section.
0a122b
0a122b
        Lexing:
0a122b
0a122b
            Split section into a list of tokens (strings), with help
0a122b
            of generator function tokenize().
0a122b
0a122b
        Parsing:
0a122b
0a122b
            Parse the first expression from the list of tokens, with
0a122b
            parse(), throw away any remaining tokens.
0a122b
0a122b
            In parse_schema(): record value of an enum, union or
0a122b
            struct key (if any) in the appropriate global table,
0a122b
            append expression to the list of expressions.
0a122b
0a122b
    Return list of expressions.
0a122b
0a122b
Known issues:
0a122b
0a122b
(1) Indentation is significant, unlike in real JSON.
0a122b
0a122b
(2) Neither lexer nor parser have any idea of source positions.  Error
0a122b
    reporting is hard, let's go shopping.
0a122b
0a122b
(3) The one error we bother to detect, we "report" via raise.
0a122b
0a122b
(4) The lexer silently ignores invalid characters.
0a122b
0a122b
(5) If everything in a section gets ignored, the parser crashes.
0a122b
0a122b
(6) The lexer treats a string containing a structural character exactly
0a122b
    like the structural character.
0a122b
0a122b
(7) Tokens trailing the first expression in a section are silently
0a122b
    ignored.
0a122b
0a122b
(8) The parser accepts any token in place of a colon.
0a122b
0a122b
(9) The parser treats comma as optional.
0a122b
0a122b
(10) parse() crashes on unexpected EOF.
0a122b
0a122b
(11) parse_schema() crashes when a section's expression isn't a JSON
0a122b
    object.
0a122b
0a122b
Replace this piece of original art by a thoroughly unoriginal design.
0a122b
Takes care of (1), (2), (5), (6) and (7), and lays the groundwork for
0a122b
addressing the others.  Generated source files remain unchanged.
0a122b
0a122b
Signed-off-by: Markus Armbruster <armbru@redhat.com>
0a122b
Reviewed-by: Eric Blake <eblake@redhat.com>
0a122b
Message-id: 1374939721-7876-4-git-send-email-armbru@redhat.com
0a122b
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
0a122b
(cherry picked from commit c7a3f25200c8692e969f21c7f2555630ec0d0d30)
0a122b
---
0a122b
 scripts/qapi.py                                | 163 +++++++++++++------------
0a122b
 tests/qapi-schema/indented-expr.out            |   2 +-
0a122b
 tests/qapi-schema/missing-colon.out            |   4 +-
0a122b
 tests/qapi-schema/quoted-structural-chars.err  |   1 +
0a122b
 tests/qapi-schema/quoted-structural-chars.exit |   2 +-
0a122b
 tests/qapi-schema/quoted-structural-chars.out  |   3 -
0a122b
 6 files changed, 88 insertions(+), 87 deletions(-)
0a122b
0a122b
Signed-off-by: Michal Novotny <minovotn@redhat.com>
0a122b
---
0a122b
 scripts/qapi.py                                | 163 +++++++++++++------------
0a122b
 tests/qapi-schema/indented-expr.out            |   2 +-
0a122b
 tests/qapi-schema/missing-colon.out            |   4 +-
0a122b
 tests/qapi-schema/quoted-structural-chars.err  |   1 +
0a122b
 tests/qapi-schema/quoted-structural-chars.exit |   2 +-
0a122b
 tests/qapi-schema/quoted-structural-chars.out  |   3 -
0a122b
 6 files changed, 88 insertions(+), 87 deletions(-)
0a122b
0a122b
diff --git a/scripts/qapi.py b/scripts/qapi.py
0a122b
index 38c808e..58e315b 100644
0a122b
--- a/scripts/qapi.py
0a122b
+++ b/scripts/qapi.py
0a122b
@@ -2,9 +2,11 @@
0a122b
 # QAPI helper library
0a122b
 #
0a122b
 # Copyright IBM, Corp. 2011
0a122b
+# Copyright (c) 2013 Red Hat Inc.
0a122b
 #
0a122b
 # Authors:
0a122b
 #  Anthony Liguori <aliguori@us.ibm.com>
0a122b
+#  Markus Armbruster <armbru@redhat.com>
0a122b
 #
0a122b
 # This work is licensed under the terms of the GNU GPLv2.
0a122b
 # See the COPYING.LIB file in the top-level directory.
0a122b
@@ -32,91 +34,92 @@ builtin_type_qtypes = {
0a122b
     'uint64':   'QTYPE_QINT',
0a122b
 }
0a122b
 
0a122b
-def tokenize(data):
0a122b
-    while len(data):
0a122b
-        ch = data[0]
0a122b
-        data = data[1:]
0a122b
-        if ch in ['{', '}', ':', ',', '[', ']']:
0a122b
-            yield ch
0a122b
-        elif ch in ' \n':
0a122b
-            None
0a122b
-        elif ch == "'":
0a122b
-            string = ''
0a122b
-            esc = False
0a122b
-            while True:
0a122b
-                if (data == ''):
0a122b
-                    raise Exception("Mismatched quotes")
0a122b
-                ch = data[0]
0a122b
-                data = data[1:]
0a122b
-                if esc:
0a122b
-                    string += ch
0a122b
-                    esc = False
0a122b
-                elif ch == "\\":
0a122b
-                    esc = True
0a122b
-                elif ch == "'":
0a122b
-                    break
0a122b
-                else:
0a122b
-                    string += ch
0a122b
-            yield string
0a122b
-
0a122b
-def parse(tokens):
0a122b
-    if tokens[0] == '{':
0a122b
-        ret = OrderedDict()
0a122b
-        tokens = tokens[1:]
0a122b
-        while tokens[0] != '}':
0a122b
-            key = tokens[0]
0a122b
-            tokens = tokens[1:]
0a122b
-
0a122b
-            tokens = tokens[1:] # :
0a122b
-
0a122b
-            value, tokens = parse(tokens)
0a122b
-
0a122b
-            if tokens[0] == ',':
0a122b
-                tokens = tokens[1:]
0a122b
-
0a122b
-            ret[key] = value
0a122b
-        tokens = tokens[1:]
0a122b
-        return ret, tokens
0a122b
-    elif tokens[0] == '[':
0a122b
-        ret = []
0a122b
-        tokens = tokens[1:]
0a122b
-        while tokens[0] != ']':
0a122b
-            value, tokens = parse(tokens)
0a122b
-            if tokens[0] == ',':
0a122b
-                tokens = tokens[1:]
0a122b
-            ret.append(value)
0a122b
-        tokens = tokens[1:]
0a122b
-        return ret, tokens
0a122b
-    else:
0a122b
-        return tokens[0], tokens[1:]
0a122b
-
0a122b
-def evaluate(string):
0a122b
-    return parse(map(lambda x: x, tokenize(string)))[0]
0a122b
-
0a122b
-def get_expr(fp):
0a122b
-    expr = ''
0a122b
-
0a122b
-    for line in fp:
0a122b
-        if line.startswith('#') or line == '\n':
0a122b
-            continue
0a122b
-
0a122b
-        if line.startswith(' '):
0a122b
-            expr += line
0a122b
-        elif expr:
0a122b
-            yield expr
0a122b
-            expr = line
0a122b
+class QAPISchema:
0a122b
+
0a122b
+    def __init__(self, fp):
0a122b
+        self.fp = fp
0a122b
+        self.src = fp.read()
0a122b
+        if self.src == '' or self.src[-1] != '\n':
0a122b
+            self.src += '\n'
0a122b
+        self.cursor = 0
0a122b
+        self.exprs = []
0a122b
+        self.accept()
0a122b
+
0a122b
+        while self.tok != None:
0a122b
+            self.exprs.append(self.get_expr())
0a122b
+
0a122b
+    def accept(self):
0a122b
+        while True:
0a122b
+            bol = self.cursor == 0 or self.src[self.cursor-1] == '\n'
0a122b
+            self.tok = self.src[self.cursor]
0a122b
+            self.cursor += 1
0a122b
+            self.val = None
0a122b
+
0a122b
+            if self.tok == '#' and bol:
0a122b
+                self.cursor = self.src.find('\n', self.cursor)
0a122b
+            elif self.tok in ['{', '}', ':', ',', '[', ']']:
0a122b
+                return
0a122b
+            elif self.tok == "'":
0a122b
+                string = ''
0a122b
+                esc = False
0a122b
+                while True:
0a122b
+                    ch = self.src[self.cursor]
0a122b
+                    self.cursor += 1
0a122b
+                    if ch == '\n':
0a122b
+                        raise Exception("Mismatched quotes")
0a122b
+                    if esc:
0a122b
+                        string += ch
0a122b
+                        esc = False
0a122b
+                    elif ch == "\\":
0a122b
+                        esc = True
0a122b
+                    elif ch == "'":
0a122b
+                        self.val = string
0a122b
+                        return
0a122b
+                    else:
0a122b
+                        string += ch
0a122b
+            elif self.tok == '\n':
0a122b
+                if self.cursor == len(self.src):
0a122b
+                    self.tok = None
0a122b
+                    return
0a122b
+
0a122b
+    def get_members(self):
0a122b
+        expr = OrderedDict()
0a122b
+        while self.tok != '}':
0a122b
+            key = self.val
0a122b
+            self.accept()
0a122b
+            self.accept()        # :
0a122b
+            expr[key] = self.get_expr()
0a122b
+            if self.tok == ',':
0a122b
+                self.accept()
0a122b
+        self.accept()
0a122b
+        return expr
0a122b
+
0a122b
+    def get_values(self):
0a122b
+        expr = []
0a122b
+        while self.tok != ']':
0a122b
+            expr.append(self.get_expr())
0a122b
+            if self.tok == ',':
0a122b
+                self.accept()
0a122b
+        self.accept()
0a122b
+        return expr
0a122b
+
0a122b
+    def get_expr(self):
0a122b
+        if self.tok == '{':
0a122b
+            self.accept()
0a122b
+            expr = self.get_members()
0a122b
+        elif self.tok == '[':
0a122b
+            self.accept()
0a122b
+            expr = self.get_values()
0a122b
         else:
0a122b
-            expr += line
0a122b
-
0a122b
-    if expr:
0a122b
-        yield expr
0a122b
+            expr = self.val
0a122b
+            self.accept()
0a122b
+        return expr
0a122b
 
0a122b
 def parse_schema(fp):
0a122b
+    schema = QAPISchema(fp)
0a122b
     exprs = []
0a122b
 
0a122b
-    for expr in get_expr(fp):
0a122b
-        expr_eval = evaluate(expr)
0a122b
-
0a122b
+    for expr_eval in schema.exprs:
0a122b
         if expr_eval.has_key('enum'):
0a122b
             add_enum(expr_eval['enum'])
0a122b
         elif expr_eval.has_key('union'):
0a122b
diff --git a/tests/qapi-schema/indented-expr.out b/tests/qapi-schema/indented-expr.out
0a122b
index 98ae692..98af89a 100644
0a122b
--- a/tests/qapi-schema/indented-expr.out
0a122b
+++ b/tests/qapi-schema/indented-expr.out
0a122b
@@ -1,3 +1,3 @@
0a122b
-[OrderedDict([('id', 'eins')])]
0a122b
+[OrderedDict([('id', 'eins')]), OrderedDict([('id', 'zwei')])]
0a122b
 []
0a122b
 []
0a122b
diff --git a/tests/qapi-schema/missing-colon.out b/tests/qapi-schema/missing-colon.out
0a122b
index 50f827e..e67068c 100644
0a122b
--- a/tests/qapi-schema/missing-colon.out
0a122b
+++ b/tests/qapi-schema/missing-colon.out
0a122b
@@ -1,3 +1,3 @@
0a122b
-[OrderedDict([('enum', ','), ('data', ['good', 'bad', 'ugly'])])]
0a122b
-[',']
0a122b
+[OrderedDict([('enum', None), ('data', ['good', 'bad', 'ugly'])])]
0a122b
+[None]
0a122b
 []
0a122b
diff --git a/tests/qapi-schema/quoted-structural-chars.err b/tests/qapi-schema/quoted-structural-chars.err
0a122b
index e69de29..48c849d 100644
0a122b
--- a/tests/qapi-schema/quoted-structural-chars.err
0a122b
+++ b/tests/qapi-schema/quoted-structural-chars.err
0a122b
@@ -0,0 +1 @@
0a122b
+Crashed: <type 'exceptions.AttributeError'>
0a122b
diff --git a/tests/qapi-schema/quoted-structural-chars.exit b/tests/qapi-schema/quoted-structural-chars.exit
0a122b
index 573541a..d00491f 100644
0a122b
--- a/tests/qapi-schema/quoted-structural-chars.exit
0a122b
+++ b/tests/qapi-schema/quoted-structural-chars.exit
0a122b
@@ -1 +1 @@
0a122b
-0
0a122b
+1
0a122b
diff --git a/tests/qapi-schema/quoted-structural-chars.out b/tests/qapi-schema/quoted-structural-chars.out
0a122b
index 85405be..e69de29 100644
0a122b
--- a/tests/qapi-schema/quoted-structural-chars.out
0a122b
+++ b/tests/qapi-schema/quoted-structural-chars.out
0a122b
@@ -1,3 +0,0 @@
0a122b
-[OrderedDict([('key1', 'value1'), ('key2', [])])]
0a122b
-[]
0a122b
-[]
0a122b
-- 
0a122b
1.7.11.7
0a122b