|
|
132810 |
From 17d4495bef5c3878bb38730ff0d849415b52641a Mon Sep 17 00:00:00 2001
|
|
|
132810 |
From: Serhei Makarov <smakarov@redhat.com>
|
|
|
132810 |
Date: Mon, 1 Oct 2018 15:38:16 -0400
|
|
|
132810 |
Subject: [PATCH 02/32] stapbpf assembler WIP #1 :: basic parser and control
|
|
|
132810 |
flow
|
|
|
132810 |
|
|
|
132810 |
---
|
|
|
132810 |
bpf-internal.h | 7 +-
|
|
|
132810 |
bpf-opt.cxx | 2 +-
|
|
|
132810 |
bpf-translate.cxx | 745 +++++++++++++++++++++++++++++++++++++++++++-----------
|
|
|
132810 |
parse.h | 14 +
|
|
|
132810 |
4 files changed, 619 insertions(+), 149 deletions(-)
|
|
|
132810 |
|
|
|
132810 |
diff --git a/bpf-internal.h b/bpf-internal.h
|
|
|
132810 |
index 17a033533..719446db8 100644
|
|
|
132810 |
--- a/bpf-internal.h
|
|
|
132810 |
+++ b/bpf-internal.h
|
|
|
132810 |
@@ -261,9 +261,10 @@ struct program
|
|
|
132810 |
void print(std::ostream &) const;
|
|
|
132810 |
};
|
|
|
132810 |
|
|
|
132810 |
-// ??? Properly belongs to bpf_unparser but must be accessible from bpf-opt.cxx:
|
|
|
132810 |
-value *emit_literal_str(program &this_prog, insn_inserter &this_ins,
|
|
|
132810 |
- value *dest, int ofs, std::string &src, bool zero_pad = false);
|
|
|
132810 |
+// ??? Properly belongs to bpf_unparser but must be visible from bpf-opt.cxx:
|
|
|
132810 |
+value *emit_simple_literal_str(program &this_prog, insn_inserter &this_ins,
|
|
|
132810 |
+ value *dest, int ofs, std::string &src,
|
|
|
132810 |
+ bool zero_pad = false);
|
|
|
132810 |
|
|
|
132810 |
inline std::ostream&
|
|
|
132810 |
operator<< (std::ostream &o, const program &c)
|
|
|
132810 |
diff --git a/bpf-opt.cxx b/bpf-opt.cxx
|
|
|
132810 |
index 0f64d826d..c2e30a690 100644
|
|
|
132810 |
--- a/bpf-opt.cxx
|
|
|
132810 |
+++ b/bpf-opt.cxx
|
|
|
132810 |
@@ -41,7 +41,7 @@ alloc_literal_str(program &p, insn_inserter &ins, std::string &str)
|
|
|
132810 |
int ofs = -tmp_space;
|
|
|
132810 |
|
|
|
132810 |
value *frame = p.lookup_reg(BPF_REG_10);
|
|
|
132810 |
- value *out = emit_literal_str(p, ins, frame, ofs, str, false /* don't zero pad */);
|
|
|
132810 |
+ value *out = emit_simple_literal_str(p, ins, frame, ofs, str, false /* don't zero pad */);
|
|
|
132810 |
return out;
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
diff --git a/bpf-translate.cxx b/bpf-translate.cxx
|
|
|
132810 |
index d848c9f16..023ac6ce7 100644
|
|
|
132810 |
--- a/bpf-translate.cxx
|
|
|
132810 |
+++ b/bpf-translate.cxx
|
|
|
132810 |
@@ -8,6 +8,7 @@
|
|
|
132810 |
|
|
|
132810 |
#include "config.h"
|
|
|
132810 |
#include "bpf-internal.h"
|
|
|
132810 |
+#include "parse.h"
|
|
|
132810 |
#include "staptree.h"
|
|
|
132810 |
#include "elaborate.h"
|
|
|
132810 |
#include "session.h"
|
|
|
132810 |
@@ -134,6 +135,9 @@ has_side_effects (expression *e)
|
|
|
132810 |
return t.side_effects;
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
+/* forward declaration */
|
|
|
132810 |
+struct asm_stmt;
|
|
|
132810 |
+
|
|
|
132810 |
struct bpf_unparser : public throwing_visitor
|
|
|
132810 |
{
|
|
|
132810 |
// The visitor class isn't as helpful as it might be. As a consequence,
|
|
|
132810 |
@@ -233,10 +237,19 @@ struct bpf_unparser : public throwing_visitor
|
|
|
132810 |
value *emit_expr(expression *e);
|
|
|
132810 |
value *emit_bool(expression *e);
|
|
|
132810 |
value *emit_context_var(bpf_context_vardecl *v);
|
|
|
132810 |
- value *parse_reg(const std::string &str, embeddedcode *s);
|
|
|
132810 |
|
|
|
132810 |
- // Used for copying string data:
|
|
|
132810 |
- value *emit_copied_str(value *dest, int ofs, value *src, bool zero_pad = false);
|
|
|
132810 |
+ // Used for the embedded-code assembler:
|
|
|
132810 |
+ size_t parse_asm_stmt (embeddedcode *s, size_t start,
|
|
|
132810 |
+ /*OUT*/asm_stmt &stmt);
|
|
|
132810 |
+ value *emit_asm_arg(const asm_stmt &stmt, const std::string ®,
|
|
|
132810 |
+ bool allow_imm = true);
|
|
|
132810 |
+ value *emit_asm_reg(const asm_stmt &stmt, const std::string ®);
|
|
|
132810 |
+ void emit_asm_opcode(const asm_stmt &stmt,
|
|
|
132810 |
+ std::map<std::string, block *> label_map);
|
|
|
132810 |
+
|
|
|
132810 |
+ // Used for string data:
|
|
|
132810 |
+ value *emit_literal_string(const std::string &str, const token *tok);
|
|
|
132810 |
+ value *emit_string_copy(value *dest, int ofs, value *src, bool zero_pad = false);
|
|
|
132810 |
|
|
|
132810 |
// Used for passing long and string arguments on the stack where an address is expected:
|
|
|
132810 |
void emit_long_arg(value *arg, int ofs, value *val);
|
|
|
132810 |
@@ -552,172 +565,604 @@ bpf_unparser::visit_block (::block *s)
|
|
|
132810 |
emit_stmt (s->statements[i]);
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
+/* WORK IN PROGRESS: A simple eBPF assembler.
|
|
|
132810 |
+
|
|
|
132810 |
+ In order to effectively write eBPF tapset functions, we want to use
|
|
|
132810 |
+ embedded-code assembly rather than compile from SystemTap code. At
|
|
|
132810 |
+ the same time, we want to hook into stapbpf functionality to
|
|
|
132810 |
+ reserve stack memory, allocate virtual registers or signal errors.
|
|
|
132810 |
+
|
|
|
132810 |
+ The assembler syntax will probably take a couple of attempts to get
|
|
|
132810 |
+ just right. This attempt keeps things as close as possible to the
|
|
|
132810 |
+ first embedded-code assembler, with a few more features and a
|
|
|
132810 |
+ disgustingly lenient parser that allows things like
|
|
|
132810 |
+ $ this is all one "**identifier**" believe-it!-or-not
|
|
|
132810 |
+
|
|
|
132810 |
+ Ahh for the days of 1960s FORTRAN.
|
|
|
132810 |
+
|
|
|
132810 |
+ TODO: It might make more sense to implement an assembler based on
|
|
|
132810 |
+ the syntax used in official eBPF subsystem docs. */
|
|
|
132810 |
+
|
|
|
132810 |
+/* Possible assembly statement types include:
|
|
|
132810 |
+
|
|
|
132810 |
+ <stmt> ::= label, <dest=label>;
|
|
|
132810 |
+ <stmt> ::= <code=integer opcode>, <dest=reg>, <src1=reg>,
|
|
|
132810 |
+ <off/jmp_target=off>, <imm=imm>;
|
|
|
132810 |
+
|
|
|
132810 |
+ Possible argument types include:
|
|
|
132810 |
+
|
|
|
132810 |
+ <reg> ::= <register index> | r<register index> |
|
|
|
132810 |
+ $<identifier> | $<integer constant> | $$ | <string constant>
|
|
|
132810 |
+ <imm> ::= <integer constant> | BPF_MAXSTRINGLEN
|
|
|
132810 |
+ <off> ::= <imm> | <jump label>
|
|
|
132810 |
+
|
|
|
132810 |
+*/
|
|
|
132810 |
+
|
|
|
132810 |
+struct asm_stmt {
|
|
|
132810 |
+ std::string kind;
|
|
|
132810 |
+
|
|
|
132810 |
+ unsigned code;
|
|
|
132810 |
+ std::string dest, src1;
|
|
|
132810 |
+ int64_t off, imm;
|
|
|
132810 |
+
|
|
|
132810 |
+ // metadata for jmp instructions
|
|
|
132810 |
+ bool has_fallthrough = false;
|
|
|
132810 |
+ std::string jmp_target, fallthrough;
|
|
|
132810 |
+
|
|
|
132810 |
+ token *tok;
|
|
|
132810 |
+ bool deallocate_tok = false;
|
|
|
132810 |
+ ~asm_stmt() { if (deallocate_tok) delete tok; }
|
|
|
132810 |
+};
|
|
|
132810 |
+
|
|
|
132810 |
+std::ostream&
|
|
|
132810 |
+operator << (std::ostream& o, const asm_stmt& stmt)
|
|
|
132810 |
+{
|
|
|
132810 |
+ if (stmt.kind == "label")
|
|
|
132810 |
+ o << "label, " << stmt.dest << ";";
|
|
|
132810 |
+ else if (stmt.kind == "opcode")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ o << std::hex << stmt.code << ", "
|
|
|
132810 |
+ << stmt.dest << ", "
|
|
|
132810 |
+ << stmt.src1 << ", ";
|
|
|
132810 |
+ if (stmt.off != 0 || stmt.jmp_target == "")
|
|
|
132810 |
+ o << stmt.off;
|
|
|
132810 |
+ else if (stmt.off != 0) // && stmt.jmp_target != ""
|
|
|
132810 |
+ o << stmt.off << "/";
|
|
|
132810 |
+ if (stmt.jmp_target != "")
|
|
|
132810 |
+ o << "label:" << stmt.jmp_target;
|
|
|
132810 |
+ o << ", "
|
|
|
132810 |
+ << stmt.imm << ";"
|
|
|
132810 |
+ << (stmt.has_fallthrough ? " +FALLTHROUGH " + stmt.fallthrough : "");
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else
|
|
|
132810 |
+ o << "<unknown asm_stmt kind '" << stmt.kind << "'>";
|
|
|
132810 |
+ return o;
|
|
|
132810 |
+}
|
|
|
132810 |
+
|
|
|
132810 |
+bool
|
|
|
132810 |
+is_numeric (const std::string &str)
|
|
|
132810 |
+{
|
|
|
132810 |
+ size_t pos = 0;
|
|
|
132810 |
+ try {
|
|
|
132810 |
+ stol(str, &pos, 0);
|
|
|
132810 |
+ } catch (std::invalid_argument &e) {
|
|
|
132810 |
+ return false;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ return (pos == str.size());
|
|
|
132810 |
+}
|
|
|
132810 |
+
|
|
|
132810 |
+/* Parse an assembly statement starting from position start in code,
|
|
|
132810 |
+ then write the output in stmt. Returns a position immediately after
|
|
|
132810 |
+ the parsed statement. */
|
|
|
132810 |
+size_t
|
|
|
132810 |
+bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
|
|
|
132810 |
+ /*OUT*/asm_stmt &stmt)
|
|
|
132810 |
+{
|
|
|
132810 |
+ const interned_string &code = s->code;
|
|
|
132810 |
+
|
|
|
132810 |
+ retry:
|
|
|
132810 |
+ std::vector<std::string> args;
|
|
|
132810 |
+ unsigned n = code.size();
|
|
|
132810 |
+ bool in_comment = false;
|
|
|
132810 |
+ bool in_string = false;
|
|
|
132810 |
+
|
|
|
132810 |
+ // compute token with adjusted source location for diagnostics
|
|
|
132810 |
+ source_loc adjusted_loc; // TODO: ought to create a proper copy constructor for source_loc
|
|
|
132810 |
+ adjusted_loc.file = s->tok->location.file;
|
|
|
132810 |
+ adjusted_loc.line = s->tok->location.line;
|
|
|
132810 |
+ adjusted_loc.column = s->tok->location.column;
|
|
|
132810 |
+ for (size_t pos = 0; pos < start && pos < n; pos++)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // TODO: should save adjusted_loc state between parse_asm_stmt invocations; add field?
|
|
|
132810 |
+ char c = code[pos];
|
|
|
132810 |
+ if (c == '\n')
|
|
|
132810 |
+ {
|
|
|
132810 |
+ adjusted_loc.line++;
|
|
|
132810 |
+ adjusted_loc.column = 1;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else
|
|
|
132810 |
+ adjusted_loc.column++;
|
|
|
132810 |
+ }
|
|
|
132810 |
+
|
|
|
132810 |
+ // TODO: As before, parser is extremely non-rigorous and could do
|
|
|
132810 |
+ // with some tightening in terms of the inputs it accepts.
|
|
|
132810 |
+ size_t pos;
|
|
|
132810 |
+ std::string arg = "";
|
|
|
132810 |
+ for (pos = start; pos < n; pos++)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ char c = code[pos];
|
|
|
132810 |
+ char c2 = pos + 1 < n ? code [pos + 1] : 0;
|
|
|
132810 |
+ if (isspace(c))
|
|
|
132810 |
+ continue; // skip
|
|
|
132810 |
+ else if (in_comment)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ if (c == '*' && c2 == '/')
|
|
|
132810 |
+ ++pos, in_comment = false;
|
|
|
132810 |
+ // else skip
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (in_string)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // resulting string will be processed by translate_escapes()
|
|
|
132810 |
+ if (c == '"')
|
|
|
132810 |
+ arg.push_back(c), in_string = false; // include quote
|
|
|
132810 |
+ else if (c == '\\' && c2 == '"')
|
|
|
132810 |
+ ++pos, arg.push_back(c), arg.push_back(c2);
|
|
|
132810 |
+ else // accept any char, including whitespace
|
|
|
132810 |
+ arg.push_back(c);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (c == '/' && c2 == '*')
|
|
|
132810 |
+ ++pos, in_comment = true;
|
|
|
132810 |
+ else if (c == '"') // found a literal string
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // XXX: This allows '"' inside an arg and will treat the
|
|
|
132810 |
+ // string as a sequence of weird identifier characters. A
|
|
|
132810 |
+ // more rigorous parser would error on mixing strings and
|
|
|
132810 |
+ // regular chars.
|
|
|
132810 |
+ arg.push_back(c); // include quote
|
|
|
132810 |
+ in_string = true;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (c == ',') // reached end of argument
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // XXX: This strips out empty args. A more rigorous parser would error.
|
|
|
132810 |
+ if (arg != "")
|
|
|
132810 |
+ args.push_back(arg);
|
|
|
132810 |
+ arg = "";
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (c == ';') // reached end of statement
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // XXX: This strips out empty args. A more rigorous parser would error.
|
|
|
132810 |
+ if (arg != "")
|
|
|
132810 |
+ args.push_back(arg);
|
|
|
132810 |
+ arg = "";
|
|
|
132810 |
+ pos++; break;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else // found (we assume) a regular char
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // XXX: As before, this strips whitespace within args
|
|
|
132810 |
+ // (so '$ab', '$ a b' and '$a b' are equivalent).
|
|
|
132810 |
+ //
|
|
|
132810 |
+ // A more rigorous parser would track in_arg
|
|
|
132810 |
+ // and after_arg states and error on whitespace within args.
|
|
|
132810 |
+ arg.push_back(c);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ }
|
|
|
132810 |
+ // final ';' is optional, so we watch for a trailing arg:
|
|
|
132810 |
+ if (arg != "") args.push_back(arg);
|
|
|
132810 |
+
|
|
|
132810 |
+ // handle the case with no args
|
|
|
132810 |
+ if (args.empty() && pos >= n)
|
|
|
132810 |
+ return std::string::npos; // finished parsing
|
|
|
132810 |
+ else if (args.empty())
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // XXX: This skips an empty statement.
|
|
|
132810 |
+ // A more rigorous parser would error.
|
|
|
132810 |
+ start = pos;
|
|
|
132810 |
+ goto retry;
|
|
|
132810 |
+ }
|
|
|
132810 |
+
|
|
|
132810 |
+ // set token with adjusted source location
|
|
|
132810 |
+ //stmt.tok = (token *)s->tok;
|
|
|
132810 |
+ // TODO this segfaults for some reason, some data not copied?
|
|
|
132810 |
+ stmt.tok = s->tok->adjust_location(adjusted_loc);
|
|
|
132810 |
+ stmt.deallocate_tok = false; // TODO must avoid destroy-on-copy
|
|
|
132810 |
+
|
|
|
132810 |
+ std::cerr << "DEBUG GOT stmt "; // TODO
|
|
|
132810 |
+ for (unsigned k = 0; k < args.size(); k++) std::cerr << args[k] << " / ";
|
|
|
132810 |
+ std::cerr << std::endl; // TODO
|
|
|
132810 |
+ if (args[0] == "label")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ if (args.size() != 2)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), stmt.tok);
|
|
|
132810 |
+ stmt.kind = args[0];
|
|
|
132810 |
+ stmt.dest = args[1];
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (is_numeric(args[0]))
|
|
|
132810 |
+ {
|
|
|
132810 |
+ if (args.size() != 5) // TODO change to 4 to test err+tok
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), stmt.tok);
|
|
|
132810 |
+ stmt.kind = "opcode";
|
|
|
132810 |
+ stmt.code = stoul(args[0], 0, 0); // TODO signal error
|
|
|
132810 |
+ stmt.dest = args[1];
|
|
|
132810 |
+ stmt.src1 = args[2];
|
|
|
132810 |
+
|
|
|
132810 |
+ bool has_jmp_target =
|
|
|
132810 |
+ BPF_CLASS(stmt.code) == BPF_JMP
|
|
|
132810 |
+ && BPF_OP(stmt.code) != BPF_EXIT
|
|
|
132810 |
+ && BPF_OP(stmt.code) != BPF_CALL;
|
|
|
132810 |
+ stmt.has_fallthrough = // only for jcond
|
|
|
132810 |
+ has_jmp_target
|
|
|
132810 |
+ && BPF_OP(stmt.code) != BPF_JA;
|
|
|
132810 |
+ // XXX: stmt.fallthrough is computed by visit_embeddedcode
|
|
|
132810 |
+
|
|
|
132810 |
+ if (has_jmp_target)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ stmt.off = 0;
|
|
|
132810 |
+ stmt.jmp_target = args[3];
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (args[3] == "BPF_MAXSTRINGLEN")
|
|
|
132810 |
+ stmt.off = BPF_MAXSTRINGLEN;
|
|
|
132810 |
+ else if (args[3] == "-")
|
|
|
132810 |
+ stmt.off = 0;
|
|
|
132810 |
+ else
|
|
|
132810 |
+ stmt.off = stol(args[3]); // TODO signal error
|
|
|
132810 |
+
|
|
|
132810 |
+ if (args[4] == "BPF_MAXSTRINGLEN")
|
|
|
132810 |
+ stmt.imm = BPF_MAXSTRINGLEN;
|
|
|
132810 |
+ else if (args[4] == "-")
|
|
|
132810 |
+ stmt.imm = 0;
|
|
|
132810 |
+ else
|
|
|
132810 |
+ stmt.imm = stol(args[4]); // TODO signal error
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("unknown bpf embeddedcode operator '%s'",
|
|
|
132810 |
+ args[0].c_str()), stmt.tok);
|
|
|
132810 |
+
|
|
|
132810 |
+ // we returned a statement, so there's more parsing to be done
|
|
|
132810 |
+ return pos;
|
|
|
132810 |
+}
|
|
|
132810 |
+
|
|
|
132810 |
+/* forward declaration */
|
|
|
132810 |
+std::string translate_escapes (const interned_string &str);
|
|
|
132810 |
+
|
|
|
132810 |
+/* Convert a <reg> or <imm> operand to a value.
|
|
|
132810 |
+ May emit code to store a string constant on the stack. */
|
|
|
132810 |
value *
|
|
|
132810 |
-bpf_unparser::parse_reg(const std::string &str, embeddedcode *s)
|
|
|
132810 |
+bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
|
|
|
132810 |
+ bool allow_imm)
|
|
|
132810 |
{
|
|
|
132810 |
- if (str == "$$")
|
|
|
132810 |
+ if (arg == "$$")
|
|
|
132810 |
{
|
|
|
132810 |
- if (func_return.empty ())
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("no return value outside function"), s->tok);
|
|
|
132810 |
+ /* arg is a return value */
|
|
|
132810 |
+ if (func_return.empty())
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("no return value outside function"), stmt.tok);
|
|
|
132810 |
return func_return_val.back();
|
|
|
132810 |
}
|
|
|
132810 |
- else if (str[0] == '$')
|
|
|
132810 |
+ else if (arg[0] == '$')
|
|
|
132810 |
{
|
|
|
132810 |
- std::string var = str.substr(1);
|
|
|
132810 |
+ /* assume arg is a variable */
|
|
|
132810 |
+ std::string var = arg.substr(1);
|
|
|
132810 |
for (auto i = this_locals->begin(); i != this_locals->end(); ++i)
|
|
|
132810 |
{
|
|
|
132810 |
vardecl *v = i->first;
|
|
|
132810 |
if (var == v->unmangled_name)
|
|
|
132810 |
return i->second;
|
|
|
132810 |
}
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("unknown variable"), s->tok);
|
|
|
132810 |
+
|
|
|
132810 |
+ /* if it's an unknown variable, allocate a temporary */
|
|
|
132810 |
+ struct vardecl *vd = new vardecl;
|
|
|
132810 |
+ vd->name = "__bpfasm__local_" + var;
|
|
|
132810 |
+ vd->unmangled_name = var;
|
|
|
132810 |
+ vd->type = pe_long;
|
|
|
132810 |
+ vd->arity = 0;
|
|
|
132810 |
+ value *reg = this_prog.new_reg();
|
|
|
132810 |
+ const locals_map::value_type v (vd, reg);
|
|
|
132810 |
+ auto ok = this_locals->insert (v);
|
|
|
132810 |
+ assert (ok.second);
|
|
|
132810 |
+ return reg;
|
|
|
132810 |
+ // TODO write a testcase
|
|
|
132810 |
}
|
|
|
132810 |
- else
|
|
|
132810 |
+ else if (is_numeric(arg) && allow_imm)
|
|
|
132810 |
{
|
|
|
132810 |
- unsigned long num = stoul(str, 0, 0);
|
|
|
132810 |
+ /* arg is an immediate constant */
|
|
|
132810 |
+ long imm = stol(arg, 0, 0);
|
|
|
132810 |
+ return this_prog.new_imm(imm);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (is_numeric(arg) || arg[0] == 'r')
|
|
|
132810 |
+ {
|
|
|
132810 |
+ /* arg is a register number */
|
|
|
132810 |
+ std::string reg = arg[0] == 'r' ? arg.substr(1) : arg;
|
|
|
132810 |
+ unsigned long num = stoul(reg, 0, 0);
|
|
|
132810 |
if (num > 10)
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid bpf register"), s->tok);
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
return this_prog.lookup_reg(num);
|
|
|
132810 |
}
|
|
|
132810 |
+ else if (arg[0] == '"')
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // TODO verify correctness
|
|
|
132810 |
+ /* arg is a string constant */
|
|
|
132810 |
+ if (arg[arg.size() - 1] != '"')
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("BUG: improper string %s",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
+ std::string escaped_str = arg.substr(1,arg.size()-2); /* strip quotes */
|
|
|
132810 |
+ std::string str = translate_escapes(escaped_str); // TODO interned_str?
|
|
|
132810 |
+ return emit_literal_string(str, stmt.tok);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (arg == "BPF_MAXSTRINGLEN")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ /* arg is BPF_MAXSTRINGLEN */
|
|
|
132810 |
+ if (!allow_imm)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
+ return this_prog.new_imm(BPF_MAXSTRINGLEN);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (arg == "-")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ /* arg is null a.k.a '0' */
|
|
|
132810 |
+ if (!allow_imm)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
+ return this_prog.new_imm(0);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (allow_imm)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid bpf argument '%s'",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
+ else
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
|
|
|
132810 |
+ arg.c_str()), stmt.tok);
|
|
|
132810 |
+
|
|
|
132810 |
+}
|
|
|
132810 |
+
|
|
|
132810 |
+value *
|
|
|
132810 |
+bpf_unparser::emit_asm_reg (const asm_stmt &stmt, const std::string ®)
|
|
|
132810 |
+{
|
|
|
132810 |
+ return emit_asm_arg(stmt, reg, /*allow_imm=*/false);
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
void
|
|
|
132810 |
-bpf_unparser::visit_embeddedcode (embeddedcode *s)
|
|
|
132810 |
+bpf_unparser::emit_asm_opcode (const asm_stmt &stmt,
|
|
|
132810 |
+ std::map<std::string, block *> label_map)
|
|
|
132810 |
{
|
|
|
132810 |
- std::string strip;
|
|
|
132810 |
- {
|
|
|
132810 |
- const interned_string &code = s->code;
|
|
|
132810 |
- unsigned n = code.size();
|
|
|
132810 |
- bool in_comment = false;
|
|
|
132810 |
+ if (stmt.code > 0xff && stmt.code != BPF_LD_MAP)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("invalid bpf code"), stmt.tok);
|
|
|
132810 |
|
|
|
132810 |
- for (unsigned i = 0; i < n; ++i)
|
|
|
132810 |
- {
|
|
|
132810 |
- char c = code[i];
|
|
|
132810 |
- if (isspace(c))
|
|
|
132810 |
- continue;
|
|
|
132810 |
- if (in_comment)
|
|
|
132810 |
- {
|
|
|
132810 |
- if (c == '*' && code[i + 1] == '/')
|
|
|
132810 |
- ++i, in_comment = false;
|
|
|
132810 |
- }
|
|
|
132810 |
- else if (c == '/' && code[i + 1] == '*')
|
|
|
132810 |
- ++i, in_comment = true;
|
|
|
132810 |
- else
|
|
|
132810 |
- strip += c;
|
|
|
132810 |
- }
|
|
|
132810 |
- }
|
|
|
132810 |
+ bool r_dest = false, r_src0 = false, r_src1 = false, i_src1 = false;
|
|
|
132810 |
+ bool op_jmp = false, op_jcond = false; condition c;
|
|
|
132810 |
+ switch (BPF_CLASS (stmt.code))
|
|
|
132810 |
+ {
|
|
|
132810 |
+ case BPF_LDX:
|
|
|
132810 |
+ r_dest = r_src1 = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ case BPF_STX:
|
|
|
132810 |
+ r_src0 = r_src1 = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ case BPF_ST:
|
|
|
132810 |
+ r_src0 = i_src1 = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+
|
|
|
132810 |
+ case BPF_ALU:
|
|
|
132810 |
+ case BPF_ALU64:
|
|
|
132810 |
+ r_dest = true;
|
|
|
132810 |
+ if (stmt.code & BPF_X)
|
|
|
132810 |
+ r_src1 = true;
|
|
|
132810 |
+ else
|
|
|
132810 |
+ i_src1 = true;
|
|
|
132810 |
+ switch (BPF_OP (stmt.code))
|
|
|
132810 |
+ {
|
|
|
132810 |
+ case BPF_NEG:
|
|
|
132810 |
+ case BPF_MOV:
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ case BPF_END:
|
|
|
132810 |
+ /* X/K bit repurposed as LE/BE. */
|
|
|
132810 |
+ i_src1 = false, r_src1 = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ default:
|
|
|
132810 |
+ r_src0 = true;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ break;
|
|
|
132810 |
+
|
|
|
132810 |
+ case BPF_JMP:
|
|
|
132810 |
+ switch (BPF_OP (stmt.code))
|
|
|
132810 |
+ {
|
|
|
132810 |
+ case BPF_EXIT:
|
|
|
132810 |
+ // no special treatment needed
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ case BPF_CALL:
|
|
|
132810 |
+ i_src1 = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ case BPF_JA:
|
|
|
132810 |
+ op_jmp = true;
|
|
|
132810 |
+ break;
|
|
|
132810 |
+ default:
|
|
|
132810 |
+ // XXX: assume this is a jcond op
|
|
|
132810 |
+ op_jcond = true;
|
|
|
132810 |
+ r_src0 = true;
|
|
|
132810 |
+ if (stmt.code & BPF_X)
|
|
|
132810 |
+ r_src1 = true;
|
|
|
132810 |
+ else
|
|
|
132810 |
+ i_src1 = true;
|
|
|
132810 |
+ }
|
|
|
132810 |
+
|
|
|
132810 |
+ // compute jump condition c
|
|
|
132810 |
+ switch (BPF_OP (stmt.code))
|
|
|
132810 |
+ {
|
|
|
132810 |
+ case BPF_JEQ: c = EQ; break;
|
|
|
132810 |
+ case BPF_JNE: c = NE; break;
|
|
|
132810 |
+ case BPF_JGT: c = GTU; break;
|
|
|
132810 |
+ case BPF_JGE: c = GEU; break;
|
|
|
132810 |
+ case BPF_JLT: c = LTU; break;
|
|
|
132810 |
+ case BPF_JLE: c = LEU; break;
|
|
|
132810 |
+ case BPF_JSGT: c = GT; break;
|
|
|
132810 |
+ case BPF_JSGE: c = GE; break;
|
|
|
132810 |
+ case BPF_JSLT: c = LT; break;
|
|
|
132810 |
+ case BPF_JSLE: c = LE; break;
|
|
|
132810 |
+ case BPF_JSET: c = TEST; break;
|
|
|
132810 |
+ default:
|
|
|
132810 |
+ if (op_jcond)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("invalid branch in bpf code"), stmt.tok);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ break;
|
|
|
132810 |
+
|
|
|
132810 |
+ default:
|
|
|
132810 |
+ if (stmt.code == BPF_LD_MAP)
|
|
|
132810 |
+ r_dest = true, i_src1 = true;
|
|
|
132810 |
+ else
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("unknown opcode '%d' in bpf code",
|
|
|
132810 |
+ stmt.code), stmt.tok);
|
|
|
132810 |
+ }
|
|
|
132810 |
|
|
|
132810 |
- std::istringstream ii (strip);
|
|
|
132810 |
- ii >> std::setbase(0);
|
|
|
132810 |
+ value *v_dest = NULL;
|
|
|
132810 |
+ if (r_dest || r_src0)
|
|
|
132810 |
+ v_dest = emit_asm_reg(stmt, stmt.dest);
|
|
|
132810 |
+ else if (stmt.dest != "0" && stmt.dest != "-")
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid register field '%s' in bpf code",
|
|
|
132810 |
+ stmt.dest.c_str()), stmt.tok);
|
|
|
132810 |
|
|
|
132810 |
- while (true)
|
|
|
132810 |
+ value *v_src1 = NULL;
|
|
|
132810 |
+ if (r_src1)
|
|
|
132810 |
+ v_src1 = emit_asm_reg(stmt, stmt.src1);
|
|
|
132810 |
+ else
|
|
|
132810 |
{
|
|
|
132810 |
- unsigned code;
|
|
|
132810 |
- char s1, s2, s3, s4;
|
|
|
132810 |
- char dest_b[256], src1_b[256];
|
|
|
132810 |
- int64_t off, imm;
|
|
|
132810 |
+ if (stmt.src1 != "0" && stmt.src1 != "-")
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("invalid register field '%s' in bpf code",
|
|
|
132810 |
+ stmt.src1.c_str()), stmt.tok);
|
|
|
132810 |
+ if (i_src1)
|
|
|
132810 |
+ v_src1 = this_prog.new_imm(stmt.imm);
|
|
|
132810 |
+ else if (stmt.imm != 0)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("invalid immediate field in bpf code"), stmt.tok);
|
|
|
132810 |
+ }
|
|
|
132810 |
|
|
|
132810 |
- ii >> code >> s1;
|
|
|
132810 |
- ii.get(dest_b, sizeof(dest_b), ',') >> s2;
|
|
|
132810 |
- ii.get(src1_b, sizeof(src1_b), ',') >> s3;
|
|
|
132810 |
- ii >> off >> s4 >> imm;
|
|
|
132810 |
+ if (stmt.off != (int16_t)stmt.off)
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("offset field '%ld' out of range in bpf code", stmt.off), stmt.tok);
|
|
|
132810 |
|
|
|
132810 |
- if (ii.fail() || s1 != ',' || s2 != ',' || s3 != ',' || s4 != ',')
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), s->tok);
|
|
|
132810 |
+ if (op_jmp)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ block *target = label_map[stmt.jmp_target];
|
|
|
132810 |
+ this_prog.mk_jmp(this_ins, target);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (op_jcond)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ if (label_map.count(stmt.jmp_target) == 0)
|
|
|
132810 |
+ throw SEMANTIC_ERROR(_F("undefined jump target '%s' in bpf code",
|
|
|
132810 |
+ stmt.jmp_target.c_str()), stmt.tok);
|
|
|
132810 |
+ if (label_map.count(stmt.fallthrough) == 0)
|
|
|
132810 |
+ throw SEMANTIC_ERROR(_F("BUG: undefined fallthrough target '%s'",
|
|
|
132810 |
+ stmt.fallthrough.c_str()), stmt.tok);
|
|
|
132810 |
+ block *target = label_map[stmt.jmp_target];
|
|
|
132810 |
+ block *fallthrough = label_map[stmt.fallthrough];
|
|
|
132810 |
+ this_prog.mk_jcond(this_ins, c, v_dest, v_src1, target, fallthrough);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else // regular opcode
|
|
|
132810 |
+ {
|
|
|
132810 |
+ insn *i = this_ins.new_insn();
|
|
|
132810 |
+ i->code = stmt.code;
|
|
|
132810 |
+ i->dest = (r_dest ? v_dest : NULL);
|
|
|
132810 |
+ i->src0 = (r_src0 ? v_dest : NULL);
|
|
|
132810 |
+ i->src1 = v_src1;
|
|
|
132810 |
+ i->off = stmt.off;
|
|
|
132810 |
+ }
|
|
|
132810 |
+}
|
|
|
132810 |
|
|
|
132810 |
- if (code > 0xff && code != BPF_LD_MAP)
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid bpf code"), s->tok);
|
|
|
132810 |
+void
|
|
|
132810 |
+bpf_unparser::visit_embeddedcode (embeddedcode *s)
|
|
|
132810 |
+{
|
|
|
132810 |
+ std::vector<asm_stmt> statements;
|
|
|
132810 |
+ asm_stmt stmt;
|
|
|
132810 |
|
|
|
132810 |
- bool r_dest = false, r_src0 = false, r_src1 = false, i_src1 = false;
|
|
|
132810 |
- switch (BPF_CLASS (code))
|
|
|
132810 |
- {
|
|
|
132810 |
- case BPF_LDX:
|
|
|
132810 |
- r_dest = r_src1 = true;
|
|
|
132810 |
- break;
|
|
|
132810 |
- case BPF_STX:
|
|
|
132810 |
- r_src0 = r_src1 = true;
|
|
|
132810 |
- break;
|
|
|
132810 |
- case BPF_ST:
|
|
|
132810 |
- r_src0 = i_src1 = true;
|
|
|
132810 |
- break;
|
|
|
132810 |
+ size_t pos = 0;
|
|
|
132810 |
+ while ((pos = parse_asm_stmt(s, pos, stmt)) != std::string::npos)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ statements.push_back(stmt);
|
|
|
132810 |
+ }
|
|
|
132810 |
|
|
|
132810 |
- case BPF_ALU:
|
|
|
132810 |
- case BPF_ALU64:
|
|
|
132810 |
- r_dest = true;
|
|
|
132810 |
- if (code & BPF_X)
|
|
|
132810 |
- r_src1 = true;
|
|
|
132810 |
- else
|
|
|
132810 |
- i_src1 = true;
|
|
|
132810 |
- switch (BPF_OP (code))
|
|
|
132810 |
- {
|
|
|
132810 |
- case BPF_NEG:
|
|
|
132810 |
- case BPF_MOV:
|
|
|
132810 |
- break;
|
|
|
132810 |
- case BPF_END:
|
|
|
132810 |
- /* X/K bit repurposed as LE/BE. */
|
|
|
132810 |
- i_src1 = false, r_src1 = true;
|
|
|
132810 |
- break;
|
|
|
132810 |
- default:
|
|
|
132810 |
- r_src0 = true;
|
|
|
132810 |
- }
|
|
|
132810 |
- break;
|
|
|
132810 |
+ // build basic block table
|
|
|
132810 |
+ std::map<std::string, block *> label_map;
|
|
|
132810 |
+ block *entry_block = this_ins.b;
|
|
|
132810 |
+ label_map[";;entry"] = entry_block;
|
|
|
132810 |
|
|
|
132810 |
- case BPF_JMP:
|
|
|
132810 |
- switch (BPF_OP (code))
|
|
|
132810 |
- {
|
|
|
132810 |
- case BPF_EXIT:
|
|
|
132810 |
- break;
|
|
|
132810 |
- case BPF_CALL:
|
|
|
132810 |
- i_src1 = true;
|
|
|
132810 |
- break;
|
|
|
132810 |
- default:
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid branch in bpf code"), s->tok);
|
|
|
132810 |
- }
|
|
|
132810 |
- break;
|
|
|
132810 |
+ bool after_label = true;
|
|
|
132810 |
+ asm_stmt *after_jump = NULL;
|
|
|
132810 |
+ unsigned fallthrough_count = 0;
|
|
|
132810 |
+ for (std::vector<asm_stmt>::iterator it = statements.begin();
|
|
|
132810 |
+ it != statements.end(); it++)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ stmt = *it;
|
|
|
132810 |
|
|
|
132810 |
- default:
|
|
|
132810 |
- if (code == BPF_LD_MAP)
|
|
|
132810 |
- r_dest = true, i_src1 = true;
|
|
|
132810 |
- else
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("unknown opcode in bpf code"), s->tok);
|
|
|
132810 |
- }
|
|
|
132810 |
+ if (after_jump != NULL && stmt.kind == "label")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ after_jump->fallthrough = stmt.dest;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (after_jump != NULL)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ block *b = this_prog.new_block();
|
|
|
132810 |
|
|
|
132810 |
- std::string dest(dest_b);
|
|
|
132810 |
- value *v_dest = NULL;
|
|
|
132810 |
- if (r_dest || r_src0)
|
|
|
132810 |
- v_dest = parse_reg(dest, s);
|
|
|
132810 |
- else if (dest != "0")
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid register field in bpf code"), s->tok);
|
|
|
132810 |
-
|
|
|
132810 |
- std::string src1(src1_b);
|
|
|
132810 |
- value *v_src1 = NULL;
|
|
|
132810 |
- if (r_src1)
|
|
|
132810 |
- v_src1 = parse_reg(src1, s);
|
|
|
132810 |
- else
|
|
|
132810 |
- {
|
|
|
132810 |
- if (src1 != "0")
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid register field in bpf code"), s->tok);
|
|
|
132810 |
- if (i_src1)
|
|
|
132810 |
- v_src1 = this_prog.new_imm(imm);
|
|
|
132810 |
- else if (imm != 0)
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid immediate field in bpf code"), s->tok);
|
|
|
132810 |
- }
|
|
|
132810 |
+ // generate unique label for fallthrough edge
|
|
|
132810 |
+ std::ostringstream oss;
|
|
|
132810 |
+ oss << "fallthrough;;" << fallthrough_count++;
|
|
|
132810 |
+ std::string fallthrough_label = oss.str();
|
|
|
132810 |
+ // XXX: semicolons prevent collision with programmer-defined labels
|
|
|
132810 |
|
|
|
132810 |
- if (off != (int16_t)off)
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("offset field out of range in bpf code"), s->tok);
|
|
|
132810 |
+ label_map[fallthrough_label] = b;
|
|
|
132810 |
+ set_block(b);
|
|
|
132810 |
|
|
|
132810 |
- insn *i = this_ins.new_insn();
|
|
|
132810 |
- i->code = code;
|
|
|
132810 |
- i->dest = (r_dest ? v_dest : NULL);
|
|
|
132810 |
- i->src0 = (r_src0 ? v_dest : NULL);
|
|
|
132810 |
- i->src1 = v_src1;
|
|
|
132810 |
- i->off = off;
|
|
|
132810 |
+ after_jump->fallthrough = fallthrough_label;
|
|
|
132810 |
+ }
|
|
|
132810 |
|
|
|
132810 |
- ii >> s1;
|
|
|
132810 |
- if (ii.eof())
|
|
|
132810 |
- break;
|
|
|
132810 |
- if (s1 != ';')
|
|
|
132810 |
- throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), s->tok);
|
|
|
132810 |
+ if (stmt.kind == "label" && after_label)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // avoid creating multiple blocks for consecutive labels
|
|
|
132810 |
+ label_map[stmt.dest] = this_ins.b;
|
|
|
132810 |
+ after_jump = NULL;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (stmt.kind == "label")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ block *b = this_prog.new_block();
|
|
|
132810 |
+ label_map[stmt.dest] = b;
|
|
|
132810 |
+ set_block(b);
|
|
|
132810 |
+ after_label = true;
|
|
|
132810 |
+ after_jump = NULL;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (stmt.has_fallthrough)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ after_label = false;
|
|
|
132810 |
+ after_jump = &*it; // be sure to refer to original, not copied stmt
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else
|
|
|
132810 |
+ {
|
|
|
132810 |
+ after_label = false;
|
|
|
132810 |
+ after_jump = NULL;
|
|
|
132810 |
+ }
|
|
|
132810 |
+ }
|
|
|
132810 |
+ if (after_jump != NULL) // TODO: should just fall through to exit
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_("BUG: bpf embeddedcode doesn't support "
|
|
|
132810 |
+ "fallthrough on final asm_stmt"), stmt.tok);
|
|
|
132810 |
+
|
|
|
132810 |
+ // emit statements
|
|
|
132810 |
+ bool jumped_already = true;
|
|
|
132810 |
+ set_block(entry_block);
|
|
|
132810 |
+ for (std::vector<asm_stmt>::iterator it = statements.begin();
|
|
|
132810 |
+ it != statements.end(); it++)
|
|
|
132810 |
+ {
|
|
|
132810 |
+ stmt = *it;
|
|
|
132810 |
+ std::cerr << "DEBUG processing " << stmt << std::endl; // TODO
|
|
|
132810 |
+ if (stmt.kind == "label")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ // TODO: be sure there's no gap in the edge
|
|
|
132810 |
+ if (!jumped_already)
|
|
|
132810 |
+ emit_jmp (label_map[stmt.dest]);
|
|
|
132810 |
+ set_block(label_map[stmt.dest]);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else if (stmt.kind == "opcode")
|
|
|
132810 |
+ {
|
|
|
132810 |
+ emit_asm_opcode (stmt, label_map);
|
|
|
132810 |
+ }
|
|
|
132810 |
+ else
|
|
|
132810 |
+ throw SEMANTIC_ERROR (_F("BUG: bpf embeddedcode contains unexpected "
|
|
|
132810 |
+ "asm_stmt kind '%s'", stmt.kind.c_str()),
|
|
|
132810 |
+ stmt.tok);
|
|
|
132810 |
+ jumped_already = stmt.has_fallthrough;
|
|
|
132810 |
+ if (stmt.has_fallthrough)
|
|
|
132810 |
+ set_block(label_map[stmt.fallthrough]);
|
|
|
132810 |
}
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
@@ -1016,8 +1461,13 @@ bpf_unparser::visit_delete_statement (delete_statement *s)
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
// Translate string escape characters.
|
|
|
132810 |
+// Accepts strings produced by parse.cxx lexer::scan and
|
|
|
132810 |
+// by the eBPF embedded-code assembler.
|
|
|
132810 |
+//
|
|
|
132810 |
+// PR23559: This is currently an eBPF-only version of the function
|
|
|
132810 |
+// that does not translate octal escapes.
|
|
|
132810 |
std::string
|
|
|
132810 |
-translate_escapes (interned_string &str)
|
|
|
132810 |
+translate_escapes (const interned_string &str)
|
|
|
132810 |
{
|
|
|
132810 |
std::string result;
|
|
|
132810 |
bool saw_esc = false;
|
|
|
132810 |
@@ -1045,16 +1495,21 @@ translate_escapes (interned_string &str)
|
|
|
132810 |
return result;
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
+value *
|
|
|
132810 |
+bpf_unparser::emit_literal_string (const std::string &str, const token *tok)
|
|
|
132810 |
+{
|
|
|
132810 |
+ size_t str_bytes = str.size() + 1;
|
|
|
132810 |
+ if (str_bytes > BPF_MAXSTRINGLEN)
|
|
|
132810 |
+ throw SEMANTIC_ERROR(_("string literal too long"), tok);
|
|
|
132810 |
+ return this_prog.new_str(str); // will be lowered to a pointer by bpf-opt.cxx
|
|
|
132810 |
+}
|
|
|
132810 |
+
|
|
|
132810 |
void
|
|
|
132810 |
bpf_unparser::visit_literal_string (literal_string* e)
|
|
|
132810 |
{
|
|
|
132810 |
interned_string v = e->value;
|
|
|
132810 |
std::string str = translate_escapes(v);
|
|
|
132810 |
-
|
|
|
132810 |
- size_t str_bytes = str.size() + 1;
|
|
|
132810 |
- if (str_bytes > BPF_MAXSTRINGLEN)
|
|
|
132810 |
- throw SEMANTIC_ERROR(_("String literal too long"), e->tok);
|
|
|
132810 |
- result = this_prog.new_str(str); // will be lowered to a pointer by bpf-opt.cxx
|
|
|
132810 |
+ result = emit_literal_string(str, e->tok);
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
void
|
|
|
132810 |
@@ -1783,7 +2238,7 @@ bpf_unparser::visit_target_register (target_register* e)
|
|
|
132810 |
// ??? Could use 8-byte chunks if we're starved for instruction count.
|
|
|
132810 |
// ??? Endianness of the target comes into play here.
|
|
|
132810 |
value *
|
|
|
132810 |
-emit_literal_str(program &this_prog, insn_inserter &this_ins,
|
|
|
132810 |
+emit_simple_literal_str(program &this_prog, insn_inserter &this_ins,
|
|
|
132810 |
value *dest, int ofs, std::string &src, bool zero_pad)
|
|
|
132810 |
{
|
|
|
132810 |
size_t str_bytes = src.size() + 1;
|
|
|
132810 |
@@ -1835,15 +2290,15 @@ emit_literal_str(program &this_prog, insn_inserter &this_ins,
|
|
|
132810 |
// ??? Could use 8-byte chunks if we're starved for instruction count.
|
|
|
132810 |
// ??? Endianness of the target may come into play here.
|
|
|
132810 |
value *
|
|
|
132810 |
-bpf_unparser::emit_copied_str(value *dest, int ofs, value *src, bool zero_pad)
|
|
|
132810 |
+bpf_unparser::emit_string_copy(value *dest, int ofs, value *src, bool zero_pad)
|
|
|
132810 |
{
|
|
|
132810 |
if (src->is_str())
|
|
|
132810 |
{
|
|
|
132810 |
/* If src is a string literal, its exact length is known and
|
|
|
132810 |
we can emit simpler, unconditional string copying code. */
|
|
|
132810 |
std::string str = src->str();
|
|
|
132810 |
- return emit_literal_str(this_prog, this_ins,
|
|
|
132810 |
- dest, ofs, str, zero_pad);
|
|
|
132810 |
+ return emit_simple_literal_str(this_prog, this_ins,
|
|
|
132810 |
+ dest, ofs, str, zero_pad);
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
size_t str_bytes = BPF_MAXSTRINGLEN;
|
|
|
132810 |
@@ -1931,7 +2386,7 @@ bpf_unparser::emit_copied_str(value *dest, int ofs, value *src, bool zero_pad)
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
// XXX: Zero-padding is only used under specific circumstances;
|
|
|
132810 |
- // see the corresponding comment in emit_literal_str().
|
|
|
132810 |
+ // see the corresponding comment in emit_simple_literal_str().
|
|
|
132810 |
if (zero_pad)
|
|
|
132810 |
{
|
|
|
132810 |
for (unsigned i = 0; i < str_words; ++i)
|
|
|
132810 |
@@ -1977,7 +2432,7 @@ void
|
|
|
132810 |
bpf_unparser::emit_str_arg(value *arg, int ofs, value *str)
|
|
|
132810 |
{
|
|
|
132810 |
value *frame = this_prog.lookup_reg(BPF_REG_10);
|
|
|
132810 |
- value *out = emit_copied_str(frame, ofs, str, true /* zero pad */);
|
|
|
132810 |
+ value *out = emit_string_copy(frame, ofs, str, true /* zero pad */);
|
|
|
132810 |
emit_mov(arg, out);
|
|
|
132810 |
}
|
|
|
132810 |
|
|
|
132810 |
diff --git a/parse.h b/parse.h
|
|
|
132810 |
index 42b0bc5fd..96aef0394 100644
|
|
|
132810 |
--- a/parse.h
|
|
|
132810 |
+++ b/parse.h
|
|
|
132810 |
@@ -65,11 +65,25 @@ struct token
|
|
|
132810 |
token_junk_type junk_type;
|
|
|
132810 |
|
|
|
132810 |
std::string junk_message(systemtap_session& session) const;
|
|
|
132810 |
+
|
|
|
132810 |
+ // Creates a new token with the same content but different coordinates.
|
|
|
132810 |
+ // Can be used for exact error reporting *within* a token e.g. embedded-code.
|
|
|
132810 |
+ token *adjust_location(const source_loc &adjusted_loc) const
|
|
|
132810 |
+ { // TODO split from header
|
|
|
132810 |
+ token *new_tok = new token;
|
|
|
132810 |
+ new_tok->location = adjusted_loc;
|
|
|
132810 |
+ new_tok->content = content;
|
|
|
132810 |
+ new_tok->chain = chain;
|
|
|
132810 |
+ new_tok->type = type;
|
|
|
132810 |
+ new_tok->junk_type = junk_type;
|
|
|
132810 |
+ return new_tok;
|
|
|
132810 |
+ }
|
|
|
132810 |
|
|
|
132810 |
friend class parser;
|
|
|
132810 |
friend class lexer;
|
|
|
132810 |
private:
|
|
|
132810 |
void make_junk (token_junk_type);
|
|
|
132810 |
+
|
|
|
132810 |
token(): chain(0), type(tok_junk), junk_type(tok_junk_unknown) {}
|
|
|
132810 |
token(const token& other):
|
|
|
132810 |
location(other.location), content(other.content),
|
|
|
132810 |
--
|
|
|
132810 |
2.14.5
|
|
|
132810 |
|