Blame SOURCES/rhbz1643997.0005-stapbpf-assembler-WIP-4-alloc-and-helper-call-operat.patch

e4e640
From f2339483ab00eff7a1a45c33b968891a63668c98 Mon Sep 17 00:00:00 2001
e4e640
From: Serhei Makarov <smakarov@redhat.com>
e4e640
Date: Tue, 16 Oct 2018 18:16:48 -0400
e4e640
Subject: [PATCH 05/32] stapbpf assembler WIP #4 :: alloc and (helper) call
e4e640
 operations
e4e640
e4e640
---
e4e640
 bpf-base.cxx       |  46 ++++++----
e4e640
 bpf-internal.h     |   8 +-
e4e640
 bpf-translate.cxx  | 259 ++++++++++++++++++++++++++++++++++++++++-------------
e4e640
 tapset/logging.stp |   2 +
e4e640
 4 files changed, 237 insertions(+), 78 deletions(-)
e4e640
e4e640
diff --git a/bpf-base.cxx b/bpf-base.cxx
e4e640
index c3e36efa1..277927b72 100644
e4e640
--- a/bpf-base.cxx
e4e640
+++ b/bpf-base.cxx
e4e640
@@ -135,31 +135,43 @@ is_commutative(opcode code)
e4e640
     }
e4e640
 }
e4e640
 
e4e640
+/* Various functions for eBPF helper lookup: */
e4e640
+
e4e640
+std::map<unsigned, const char *> bpf_func_name_map;
e4e640
+std::map<std::string, bpf_func_id> bpf_func_id_map;
e4e640
+
e4e640
+void
e4e640
+init_bpf_helper_tables () // TODO call before script translation
e4e640
+{
e4e640
+#define __BPF_SET_FUNC_NAME(x) bpf_func_name_map[BPF_FUNC_ ## x] = #x
e4e640
+#define __BPF_SET_FUNC_ID(x) bpf_func_id_map[#x] = BPF_FUNC_ ## x
e4e640
+  __BPF_FUNC_MAPPER(__BPF_SET_FUNC_NAME)
e4e640
+  __STAPBPF_FUNC_MAPPER(__BPF_SET_FUNC_NAME)
e4e640
+  __BPF_FUNC_MAPPER(__BPF_SET_FUNC_ID)
e4e640
+  __STAPBPF_FUNC_MAPPER(__BPF_SET_FUNC_ID)
e4e640
+  (void)0;
e4e640
+}
e4e640
+
e4e640
 const char *
e4e640
 bpf_function_name (unsigned id)
e4e640
 {
e4e640
-  switch (id)
e4e640
-    {
e4e640
-    case BPF_FUNC_map_lookup_elem:	return "map_lookup_elem";
e4e640
-    case BPF_FUNC_map_update_elem:	return "map_update_elem";
e4e640
-    case BPF_FUNC_map_delete_elem:	return "map_delete_elem";
e4e640
-    case BPF_FUNC_probe_read:		return "probe_read";
e4e640
-    case BPF_FUNC_ktime_get_ns:		return "ktime_get_ns";
e4e640
-    case BPF_FUNC_trace_printk:		return "trace_printk";
e4e640
-    case BPF_FUNC_get_prandom_u32:	return "get_prandom_u32";
e4e640
-    case BPF_FUNC_get_smp_processor_id:	return "get_smp_processor_id";
e4e640
-    case BPF_FUNC_get_current_pid_tgid:	return "get_current_pid_tgid";
e4e640
-    case BPF_FUNC_get_current_uid_gid:	return "get_current_uid_gid";
e4e640
-    case BPF_FUNC_get_current_comm:	return "get_current_comm";
e4e640
-    case BPF_FUNC_perf_event_read:	return "perf_event_read";
e4e640
-    case BPF_FUNC_perf_event_output:	return "perf_event_output";
e4e640
-    default:				return NULL;
e4e640
-    }
e4e640
+  if (bpf_func_name_map.count(id) != 0)
e4e640
+    return bpf_func_name_map[id];
e4e640
+  return NULL;
e4e640
+}
e4e640
+
e4e640
+bpf_func_id
e4e640
+bpf_function_id (const std::string& name)
e4e640
+{
e4e640
+  if (bpf_func_id_map.count(name) != 0)
e4e640
+    return bpf_func_id_map[name];
e4e640
+  return __BPF_FUNC_MAX_ID;
e4e640
 }
e4e640
 
e4e640
 unsigned
e4e640
 bpf_function_nargs (unsigned id)
e4e640
 {
e4e640
+  // ??? generalize to all bpf functions
e4e640
   switch (id)
e4e640
     {
e4e640
     case BPF_FUNC_map_lookup_elem:	return 2;
e4e640
diff --git a/bpf-internal.h b/bpf-internal.h
e4e640
index 719446db8..61514db9f 100644
e4e640
--- a/bpf-internal.h
e4e640
+++ b/bpf-internal.h
e4e640
@@ -96,14 +96,20 @@ bool is_move(opcode c);
e4e640
 bool is_ldst(opcode c);
e4e640
 bool is_binary(opcode c);
e4e640
 bool is_commutative(opcode c);
e4e640
+
e4e640
+void init_bpf_helper_tables();
e4e640
 const char *bpf_function_name (unsigned id);
e4e640
+bpf_func_id bpf_function_id (const std::string &name);
e4e640
 unsigned bpf_function_nargs (unsigned id);
e4e640
 
e4e640
 const opcode BPF_LD_MAP = BPF_LD | BPF_IMM | BPF_DW | (BPF_PSEUDO_MAP_FD << 8);
e4e640
 
e4e640
-// Not actual BPF helpers, but treating them like one simplifies some of the
e4e640
+// Not actual BPF helpers, but treating them as such simplifies some of the
e4e640
 // interpreter logic. We give them IDs that shouldn't conflict with IDs of
e4e640
 // real BPF helpers.
e4e640
+#define __STAPBPF_FUNC_MAPPER(FN) \
e4e640
+  FN(map_get_next_key),           \
e4e640
+  FN(sprintf),
e4e640
 const bpf_func_id BPF_FUNC_map_get_next_key    = (bpf_func_id) -1;
e4e640
 const bpf_func_id BPF_FUNC_sprintf             = (bpf_func_id) -2;
e4e640
 
e4e640
diff --git a/bpf-translate.cxx b/bpf-translate.cxx
e4e640
index 023ac6ce7..af3f54b50 100644
e4e640
--- a/bpf-translate.cxx
e4e640
+++ b/bpf-translate.cxx
e4e640
@@ -179,7 +179,7 @@ struct bpf_unparser : public throwing_visitor
e4e640
   // TODO General triage of bpf-possible functionality:
e4e640
   virtual void visit_block (::block *s);
e4e640
   // TODO visit_try_block -> UNHANDLED
e4e640
-  virtual void visit_embeddedcode (embeddedcode *s); // TODO need to find testcase/example for this
e4e640
+  virtual void visit_embeddedcode (embeddedcode *s);
e4e640
   virtual void visit_null_statement (null_statement *s);
e4e640
   virtual void visit_expr_statement (expr_statement *s);
e4e640
   virtual void visit_if_statement (if_statement* s);
e4e640
@@ -192,7 +192,7 @@ struct bpf_unparser : public throwing_visitor
e4e640
   virtual void visit_continue_statement (continue_statement* s);
e4e640
   virtual void visit_literal_string (literal_string *e);
e4e640
   virtual void visit_literal_number (literal_number* e);
e4e640
-  // TODO visit_embedded_expr -> UNHANDLED, could be handled like embedded_code with a return value?
e4e640
+  // TODO visit_embedded_expr -> UNHANDLED, could treat as embedded_code
e4e640
   virtual void visit_binary_expression (binary_expression* e);
e4e640
   virtual void visit_unary_expression (unary_expression* e);
e4e640
   virtual void visit_pre_crement (pre_crement* e);
e4e640
@@ -200,7 +200,7 @@ struct bpf_unparser : public throwing_visitor
e4e640
   virtual void visit_logical_or_expr (logical_or_expr* e);
e4e640
   virtual void visit_logical_and_expr (logical_and_expr* e);
e4e640
   virtual void visit_array_in (array_in* e);
e4e640
-  // ??? visit_regex_query is UNHANDLED, requires adding new kernel functionality.
e4e640
+  // ??? visit_regex_query -> UNHANDLED, requires new kernel functionality
e4e640
   virtual void visit_compound_expression (compound_expression *e);
e4e640
   virtual void visit_comparison (comparison* e);
e4e640
   // TODO visit_concatenation -> (2) pseudo-LOOP: copy the strings while concatenating
e4e640
@@ -239,14 +239,21 @@ struct bpf_unparser : public throwing_visitor
e4e640
   value *emit_context_var(bpf_context_vardecl *v);
e4e640
 
e4e640
   // Used for the embedded-code assembler:
e4e640
+  int64_t parse_imm (const asm_stmt &stmt, const std::string &str);
e4e640
   size_t parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
                            /*OUT*/asm_stmt &stmt);
e4e640
-  value *emit_asm_arg(const asm_stmt &stmt, const std::string &reg,
e4e640
-                      bool allow_imm = true);
e4e640
+  value *emit_asm_arg(const asm_stmt &stmt, const std::string &arg,
e4e640
+                      bool allow_imm = true, bool allow_emit = true);
e4e640
   value *emit_asm_reg(const asm_stmt &stmt, const std::string ®);
e4e640
+  value *get_asm_reg(const asm_stmt &stmt, const std::string ®);
e4e640
   void emit_asm_opcode(const asm_stmt &stmt,
e4e640
                        std::map<std::string, block *> label_map);
e4e640
 
e4e640
+  // Used for the embedded-code assembler's diagnostics:
e4e640
+  source_loc adjusted_loc;
e4e640
+  size_t adjust_pos;
e4e640
+  std::vector<token *> adjusted_toks; // track for deallocation
e4e640
+
e4e640
   // Used for string data:
e4e640
   value *emit_literal_string(const std::string &str, const token *tok);
e4e640
   value *emit_string_copy(value *dest, int ofs, value *src, bool zero_pad = false);
e4e640
@@ -580,17 +587,22 @@ bpf_unparser::visit_block (::block *s)
e4e640
 
e4e640
    Ahh for the days of 1960s FORTRAN.
e4e640
 
e4e640
-   TODO: It might make more sense to implement an assembler based on
e4e640
+   ??? It might make more sense to implement an assembler based on
e4e640
    the syntax used in official eBPF subsystem docs. */
e4e640
 
e4e640
-/* Possible assembly statement types include:
e4e640
+/* Supported assembly statement types include:
e4e640
 
e4e640
    <stmt> ::= label, <dest=label>;
e4e640
+   <stmt> ::= alloc, <dest=reg>, <imm=imm>;
e4e640
+   <stmt> ::= call, <dest=reg>, <param[0]=function name>, <param[1]=arg>, ...;
e4e640
+   <stmt> ::= printf, <param[0]=string constant>, <param[1]=arg>, ...;
e4e640
+   <stmt> ::= error, <param[0]=string constant>, <param[1]=arg>, ...;
e4e640
    <stmt> ::= <code=integer opcode>, <dest=reg>, <src1=reg>,
e4e640
               <off/jmp_target=off>, <imm=imm>;
e4e640
 
e4e640
-   Possible argument types include:
e4e640
+   Supported argument types include:
e4e640
 
e4e640
+   <arg> ::= <reg> | <imm>
e4e640
    <reg> ::= <register index> | r<register index> |
e4e640
              $<identifier> | $<integer constant> | $$ | <string constant>
e4e640
    <imm> ::= <integer constant> | BPF_MAXSTRINGLEN
e4e640
@@ -598,6 +610,9 @@ bpf_unparser::visit_block (::block *s)
e4e640
 
e4e640
 */
e4e640
 
e4e640
+// TODO
e4e640
+#define BPF_ASM_DEBUG
e4e640
+
e4e640
 struct asm_stmt {
e4e640
   std::string kind;
e4e640
 
e4e640
@@ -609,9 +624,10 @@ struct asm_stmt {
e4e640
   bool has_fallthrough = false;
e4e640
   std::string jmp_target, fallthrough;
e4e640
 
e4e640
+  // metadata for call, error instructions
e4e640
+  std::vector<std::string> params;
e4e640
+
e4e640
   token *tok;
e4e640
-  bool deallocate_tok = false;
e4e640
-  ~asm_stmt() { if (deallocate_tok) delete tok; }
e4e640
 };
e4e640
 
e4e640
 std::ostream&
e4e640
@@ -647,10 +663,30 @@ is_numeric (const std::string &str)
e4e640
     stol(str, &pos, 0);
e4e640
   } catch (std::invalid_argument &e) {
e4e640
     return false;
e4e640
+  } catch (std::out_of_range &e) {
e4e640
+    /* XXX: probably numeric but not valid; give up */
e4e640
+    return false;
e4e640
   }
e4e640
   return (pos == str.size());
e4e640
 }
e4e640
 
e4e640
+int64_t
e4e640
+bpf_unparser::parse_imm (const asm_stmt &stmt, const std::string &str)
e4e640
+{
e4e640
+  int64_t val;
e4e640
+  if (str == "BPF_MAXSTRINGLEN")
e4e640
+    val = BPF_MAXSTRINGLEN;
e4e640
+  else if (str == "-")
e4e640
+    val = 0;
e4e640
+  else try {
e4e640
+      val = stol(str);
e4e640
+    } catch (std::exception &e) { // XXX: invalid_argument, out_of_range
e4e640
+      throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode operand '%s'",
e4e640
+                               str.c_str()), stmt.tok);
e4e640
+    }
e4e640
+  return val;
e4e640
+}
e4e640
+
e4e640
 /* Parse an assembly statement starting from position start in code,
e4e640
    then write the output in stmt. Returns a position immediately after
e4e640
    the parsed statement. */
e4e640
@@ -663,31 +699,14 @@ bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
  retry:
e4e640
   std::vector<std::string> args;
e4e640
   unsigned n = code.size();
e4e640
+  size_t pos;
e4e640
   bool in_comment = false;
e4e640
   bool in_string = false;
e4e640
 
e4e640
-  // compute token with adjusted source location for diagnostics
e4e640
-  source_loc adjusted_loc; // TODO: ought to create a proper copy constructor for source_loc
e4e640
-  adjusted_loc.file = s->tok->location.file;
e4e640
-  adjusted_loc.line = s->tok->location.line;
e4e640
-  adjusted_loc.column = s->tok->location.column;
e4e640
-  for (size_t pos = 0; pos < start && pos < n; pos++)
e4e640
-    {
e4e640
-      // TODO: should save adjusted_loc state between parse_asm_stmt invocations; add field?
e4e640
-      char c = code[pos];
e4e640
-      if (c == '\n')
e4e640
-        {
e4e640
-          adjusted_loc.line++;
e4e640
-          adjusted_loc.column = 1;
e4e640
-        }
e4e640
-      else
e4e640
-        adjusted_loc.column++;
e4e640
-    }
e4e640
-
e4e640
-  // TODO: As before, parser is extremely non-rigorous and could do
e4e640
+  // ??? As before, parser is extremely non-rigorous and could do
e4e640
   // with some tightening in terms of the inputs it accepts.
e4e640
-  size_t pos;
e4e640
   std::string arg = "";
e4e640
+  size_t save_start = start; // -- position for diagnostics
e4e640
   for (pos = start; pos < n; pos++)
e4e640
   {
e4e640
     char c = code[pos];
e4e640
@@ -714,6 +733,9 @@ bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
       ++pos, in_comment = true;
e4e640
     else if (c == '"') // found a literal string
e4e640
       {
e4e640
+        if (arg.empty() && args.empty())
e4e640
+          save_start = pos; // start of first argument
e4e640
+
e4e640
         // XXX: This allows '"' inside an arg and will treat the
e4e640
         // string as a sequence of weird identifier characters.  A
e4e640
         // more rigorous parser would error on mixing strings and
e4e640
@@ -738,6 +760,9 @@ bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
       }
e4e640
     else // found (we assume) a regular char
e4e640
       {
e4e640
+        if (arg.empty() && args.empty())
e4e640
+          save_start = pos; // start of first argument
e4e640
+
e4e640
         // XXX: As before, this strips whitespace within args
e4e640
         // (so '$ab', '$ a b' and '$a b' are equivalent).
e4e640
         //
e4e640
@@ -760,28 +785,73 @@ bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
       goto retry;
e4e640
     }
e4e640
 
e4e640
+  // compute token with adjusted source location for diagnostics
e4e640
+  // TODO: needs some attention to how multiline tokens are printed in error reporting -- with this code, caret aligns incorrectly
e4e640
+  for (/* use saved adjust_pos */; adjust_pos < save_start && adjust_pos < n; adjust_pos++)
e4e640
+    {
e4e640
+      char c = code[adjust_pos];
e4e640
+      if (c == '\n')
e4e640
+        {
e4e640
+          adjusted_loc.line++;
e4e640
+          adjusted_loc.column = 1;
e4e640
+        }
e4e640
+      else
e4e640
+        adjusted_loc.column++;
e4e640
+    }
e4e640
+
e4e640
   // set token with adjusted source location
e4e640
-  //stmt.tok = (token *)s->tok;
e4e640
-  // TODO this segfaults for some reason, some data not copied?
e4e640
   stmt.tok = s->tok->adjust_location(adjusted_loc);
e4e640
-  stmt.deallocate_tok = false; // TODO must avoid destroy-on-copy
e4e640
+  adjusted_toks.push_back(stmt.tok);
e4e640
 
e4e640
-  std::cerr << "DEBUG GOT stmt "; // TODO
e4e640
-  for (unsigned k = 0; k < args.size(); k++) std::cerr << args[k] << " / ";
e4e640
-  std::cerr << std::endl; // TODO
e4e640
+#ifdef BPF_ASM_DEBUG
e4e640
+  std::cerr << "bpf_asm parse_asm_stmt: tokenizer got ";
e4e640
+  for (unsigned k = 0; k < args.size(); k++)
e4e640
+    std::cerr << args[k] << ", ";
e4e640
+  std::cerr << std::endl;
e4e640
+#endif
e4e640
   if (args[0] == "label")
e4e640
     {
e4e640
       if (args.size() != 2)
e4e640
-        throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), stmt.tok);
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (label expects 1 arg, found %lu)", args.size()-1), stmt.tok);
e4e640
+      stmt.kind = args[0];
e4e640
+      stmt.dest = args[1];
e4e640
+    }
e4e640
+  else if (args[0] == "alloc")
e4e640
+    {
e4e640
+      if (args.size() != 3)
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (alloc expects 2 args, found %lu)", args.size()-1), stmt.tok);
e4e640
       stmt.kind = args[0];
e4e640
       stmt.dest = args[1];
e4e640
+      stmt.imm = parse_imm(stmt, args[2]);
e4e640
+    }
e4e640
+  else if (args[0] == "call")
e4e640
+    {
e4e640
+      if (args.size() < 3)
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (call expects at least 2 args, found %lu)", args.size()-1), stmt.tok);
e4e640
+      stmt.kind = args[0];
e4e640
+      stmt.dest = args[1];
e4e640
+      for (unsigned k = 2; k < args.size(); k++)
e4e640
+        stmt.params.push_back(args[k]);
e4e640
+    }
e4e640
+  else if (args[0] == "printf" || args[0] == "error")
e4e640
+    {
e4e640
+      if (args.size() < 2)
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (%s expects at least 2 args, found %lu)", args[0].c_str(), args.size()-1), stmt.tok);
e4e640
+      stmt.kind = args[0];
e4e640
+      for (unsigned k = 2; k < args.size(); k++)
e4e640
+        stmt.params.push_back(args[k]);
e4e640
     }
e4e640
   else if (is_numeric(args[0]))
e4e640
     {
e4e640
-      if (args.size() != 5) // TODO change to 4 to test err+tok
e4e640
-        throw SEMANTIC_ERROR (_("invalid bpf embeddedcode syntax"), stmt.tok);
e4e640
+      if (args.size() != 5)
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (opcode expects 4 args, found %lu)", args.size()-1), stmt.tok);
e4e640
       stmt.kind = "opcode";
e4e640
-      stmt.code = stoul(args[0], 0, 0); // TODO signal error
e4e640
+      try {
e4e640
+        stmt.code = stoul(args[0], 0, 0);
e4e640
+      } catch (std::exception &e) { // XXX: invalid_argument, out_of_range
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode opcode '%s'",
e4e640
+                                 args[0].c_str()), stmt.tok);
e4e640
+      }
e4e640
       stmt.dest = args[1];
e4e640
       stmt.src1 = args[2];
e4e640
 
e4e640
@@ -799,25 +869,16 @@ bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
e4e640
           stmt.off = 0;
e4e640
           stmt.jmp_target = args[3];
e4e640
         }
e4e640
-      else if (args[3] == "BPF_MAXSTRINGLEN")
e4e640
-        stmt.off = BPF_MAXSTRINGLEN;
e4e640
-      else if (args[3] == "-")
e4e640
-        stmt.off = 0;
e4e640
       else
e4e640
-        stmt.off = stol(args[3]); // TODO signal error
e4e640
+        stmt.off = parse_imm(stmt, args[3]);
e4e640
 
e4e640
-      if (args[4] == "BPF_MAXSTRINGLEN")
e4e640
-        stmt.imm = BPF_MAXSTRINGLEN;
e4e640
-      else if (args[4] == "-")
e4e640
-        stmt.imm = 0;
e4e640
-      else
e4e640
-        stmt.imm = stol(args[4]); // TODO signal error
e4e640
+      stmt.imm = parse_imm(stmt, args[4]);
e4e640
     }
e4e640
   else
e4e640
     throw SEMANTIC_ERROR (_F("unknown bpf embeddedcode operator '%s'",
e4e640
                              args[0].c_str()), stmt.tok);
e4e640
 
e4e640
-  // we returned a statement, so there's more parsing to be done
e4e640
+  // we returned one statement, there may be more parsing to be done
e4e640
   return pos;
e4e640
 }
e4e640
 
e4e640
@@ -828,7 +889,7 @@ std::string translate_escapes (const interned_string &str);
e4e640
    May emit code to store a string constant on the stack. */
e4e640
 value *
e4e640
 bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
e4e640
-                            bool allow_imm)
e4e640
+                            bool allow_imm, bool allow_emit)
e4e640
 {
e4e640
   if (arg == "$$")
e4e640
     {
e4e640
@@ -859,7 +920,6 @@ bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
e4e640
       auto ok = this_locals->insert (v);
e4e640
       assert (ok.second);
e4e640
       return reg;
e4e640
-      // TODO write a testcase
e4e640
     }
e4e640
   else if (is_numeric(arg) && allow_imm)
e4e640
     {
e4e640
@@ -879,13 +939,17 @@ bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
e4e640
     }
e4e640
   else if (arg[0] == '"')
e4e640
     {
e4e640
-      // TODO verify correctness
e4e640
+      if (!allow_emit)
e4e640
+        throw SEMANTIC_ERROR (_F("invalid bpf argument %s "
e4e640
+                                 "(string literal not allowed here)",
e4e640
+                                 arg.c_str()), stmt.tok);
e4e640
+
e4e640
       /* arg is a string constant */
e4e640
       if (arg[arg.size() - 1] != '"')
e4e640
         throw SEMANTIC_ERROR (_F("BUG: improper string %s",
e4e640
                                  arg.c_str()), stmt.tok);
e4e640
       std::string escaped_str = arg.substr(1,arg.size()-2); /* strip quotes */
e4e640
-      std::string str = translate_escapes(escaped_str); // TODO interned_str?
e4e640
+      std::string str = translate_escapes(escaped_str);
e4e640
       return emit_literal_string(str, stmt.tok);
e4e640
     }
e4e640
   else if (arg == "BPF_MAXSTRINGLEN")
e4e640
@@ -913,12 +977,22 @@ bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
e4e640
   
e4e640
 }
e4e640
 
e4e640
+/* As above, but don't accept immediate values.
e4e640
+   Do accept string constants (since they're stored in a register). */
e4e640
 value *
e4e640
 bpf_unparser::emit_asm_reg (const asm_stmt &stmt, const std::string &reg)
e4e640
 {
e4e640
   return emit_asm_arg(stmt, reg, /*allow_imm=*/false);
e4e640
 }
e4e640
 
e4e640
+/* As above, but don't allow string constants or anything that emits code.
e4e640
+   Useful if the context requires an lvalue. */
e4e640
+value *
e4e640
+bpf_unparser::get_asm_reg (const asm_stmt &stmt, const std::string &reg)
e4e640
+{
e4e640
+  return emit_asm_arg(stmt, reg, /*allow_imm=*/false, /*allow_emit=*/false);
e4e640
+}
e4e640
+
e4e640
 void
e4e640
 bpf_unparser::emit_asm_opcode (const asm_stmt &stmt,
e4e640
                                std::map<std::string, block *> label_map)
e4e640
@@ -1013,7 +1087,7 @@ bpf_unparser::emit_asm_opcode (const asm_stmt &stmt,
e4e640
 
e4e640
   value *v_dest = NULL;
e4e640
   if (r_dest || r_src0)
e4e640
-    v_dest = emit_asm_reg(stmt, stmt.dest);
e4e640
+    v_dest = get_asm_reg(stmt, stmt.dest);
e4e640
   else if (stmt.dest != "0" && stmt.dest != "-")
e4e640
     throw SEMANTIC_ERROR (_F("invalid register field '%s' in bpf code",
e4e640
                              stmt.dest.c_str()), stmt.tok);
e4e640
@@ -1069,6 +1143,10 @@ bpf_unparser::visit_embeddedcode (embeddedcode *s)
e4e640
   std::vector<asm_stmt> statements;
e4e640
   asm_stmt stmt;
e4e640
 
e4e640
+  // track adjusted source location for each stmt
e4e640
+  adjusted_loc = s->tok->location;
e4e640
+  adjust_pos = 0;
e4e640
+
e4e640
   size_t pos = 0;
e4e640
   while ((pos = parse_asm_stmt(s, pos, stmt)) != std::string::npos)
e4e640
     {
e4e640
@@ -1133,7 +1211,7 @@ bpf_unparser::visit_embeddedcode (embeddedcode *s)
e4e640
           after_jump = NULL;
e4e640
         }
e4e640
     }
e4e640
-  if (after_jump != NULL) // TODO: should just fall through to exit
e4e640
+  if (after_jump != NULL) // ??? should just fall through to exit
e4e640
     throw SEMANTIC_ERROR (_("BUG: bpf embeddedcode doesn't support "
e4e640
                             "fallthrough on final asm_stmt"), stmt.tok);
e4e640
 
e4e640
@@ -1144,14 +1222,67 @@ bpf_unparser::visit_embeddedcode (embeddedcode *s)
e4e640
        it != statements.end(); it++)
e4e640
     {
e4e640
       stmt = *it;
e4e640
-      std::cerr << "DEBUG processing " << stmt << std::endl; // TODO
e4e640
+#ifdef BPF_ASM_DEBUG
e4e640
+      std::cerr << "bpf_asm visit_embeddedcode: " << stmt << std::endl;
e4e640
+#endif
e4e640
       if (stmt.kind == "label")
e4e640
         {
e4e640
-          // TODO: be sure there's no gap in the edge
e4e640
           if (!jumped_already)
e4e640
             emit_jmp (label_map[stmt.dest]);
e4e640
           set_block(label_map[stmt.dest]);
e4e640
         }
e4e640
+      else if (stmt.kind == "alloc")
e4e640
+        {
e4e640
+          /* Reserve stack space and store its address in dest. */
e4e640
+          int ofs = this_prog.max_tmp_space + stmt.imm;
e4e640
+          value *dest = get_asm_reg(stmt, stmt.dest);
e4e640
+          this_prog.use_tmp_space(-ofs);
e4e640
+          this_prog.mk_binary(this_ins, BPF_ADD, dest,
e4e640
+                              this_prog.lookup_reg(BPF_REG_10) /*frame*/,
e4e640
+                              this_prog.new_imm(ofs));
e4e640
+        }
e4e640
+      else if (stmt.kind == "call")
e4e640
+        {
e4e640
+          std::string func_name = stmt.params[0];
e4e640
+          bpf_func_id hid = bpf_function_id(func_name);
e4e640
+          if (hid != __BPF_FUNC_MAX_ID)
e4e640
+            {
e4e640
+              // TODO diagnostic: check if the number of arguments is correct
e4e640
+              regno r = BPF_REG_1; unsigned nargs = 0;
e4e640
+              for (unsigned k = 1; k < stmt.params.size(); k++)
e4e640
+                {
e4e640
+                  // ??? Could make params optional to avoid this part,
e4e640
+                  // ??? since the calling convention is well-known.
e4e640
+                  value *from_reg = emit_asm_arg(stmt, stmt.params[k]);
e4e640
+                  value *to_reg = this_prog.lookup_reg(r);
e4e640
+                  this_prog.mk_mov(this_ins, to_reg, from_reg);
e4e640
+                  nargs++; r++;
e4e640
+                }
e4e640
+              this_prog.mk_call(this_ins, hid, nargs);
e4e640
+              this_prog.mk_mov(this_ins, get_asm_reg(stmt, stmt.dest),
e4e640
+                               this_prog.lookup_reg(BPF_REG_0) /* returnval */);
e4e640
+              // ??? Could make stmt.dest optional to avoid this extra mov,
e4e640
+              // ??? since the BPF_REG_0 convention is well-known.
e4e640
+            }
e4e640
+          else
e4e640
+            {
e4e640
+              // TODO function_name = params[0];
e4e640
+              // TODO args = parse_reg(params[1]), parse_reg(params[2]), ...
e4e640
+              // TODO emit_functioncall() with good bits from visit_functioncall()
e4e640
+              throw SEMANTIC_ERROR (_("BUG: bpf embeddedcode non-helper 'call' not yet supported"),
e4e640
+                                    stmt.tok);
e4e640
+            }
e4e640
+        }
e4e640
+      else if (stmt.kind == "printf" || stmt.kind == "error")
e4e640
+        {
e4e640
+          // TODO Note that error() should be modeled on the tapset function in tapset/logging.stp
e4e640
+          // TODO format = params[0];
e4e640
+          // TODO args = parse_reg(params[1]), parse_reg(params[2]), ...
e4e640
+          // TODO emit_print_format() with good bits from visit_print_format()
e4e640
+          // TODO if (stmt.kind == "error") emit functioncall to exit() 
e4e640
+          throw SEMANTIC_ERROR (_("BUG: bpf embeddedcode 'printf/error' not yet supported"),
e4e640
+                                stmt.tok);
e4e640
+        }
e4e640
       else if (stmt.kind == "opcode")
e4e640
         {
e4e640
           emit_asm_opcode (stmt, label_map);
e4e640
@@ -1164,6 +1295,12 @@ bpf_unparser::visit_embeddedcode (embeddedcode *s)
e4e640
       if (stmt.has_fallthrough)
e4e640
         set_block(label_map[stmt.fallthrough]);
e4e640
     }
e4e640
+
e4e640
+  // housekeeping -- deallocate adjusted_toks along with statements
e4e640
+  for (std::vector<token *>::iterator it = adjusted_toks.begin();
e4e640
+       it != adjusted_toks.end(); it++)
e4e640
+    delete *it;
e4e640
+  adjusted_toks.clear();
e4e640
 }
e4e640
 
e4e640
 void
e4e640
@@ -3260,6 +3397,8 @@ translate_bpf_pass (systemtap_session& s)
e4e640
 {
e4e640
   using namespace bpf;
e4e640
 
e4e640
+  init_bpf_helper_tables();
e4e640
+
e4e640
   if (elf_version(EV_CURRENT) == EV_NONE)
e4e640
     return 1;
e4e640
 
e4e640
diff --git a/tapset/logging.stp b/tapset/logging.stp
e4e640
index 59edce3c8..839239b8f 100644
e4e640
--- a/tapset/logging.stp
e4e640
+++ b/tapset/logging.stp
e4e640
@@ -128,6 +128,8 @@ function error (msg:string)
e4e640
      exit() // TODO: should support MAXERRORS, probe error {}
e4e640
    }
e4e640
 %)
e4e640
+// NOTE: The 'error' statement in the eBPF assembler in bpf-translate.cxx
e4e640
+// should be kept up-to-date with the behaviour of this function.
e4e640
 
e4e640
 /**
e4e640
  * sfunction assert - evaluate assertion
e4e640
-- 
e4e640
2.14.5
e4e640