Blame SOURCES/0008-Port-to-the-ARM-64-bits-AArch64-architecture-experim.patch

1280ab
From 9f75e98d1cad55d1f6e0131e656acc716177e8d5 Mon Sep 17 00:00:00 2001
1280ab
From: Xavier Leroy <xavier.leroy@inria.fr>
1280ab
Date: Thu, 18 Jul 2013 16:09:20 +0000
66bad7
Subject: [PATCH 08/19] Port to the ARM 64-bits (AArch64) architecture
1280ab
 (experimental). Merge of branch branches/arm64.
1280ab
1280ab
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@13909 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
1280ab
(cherry picked from commit 055d5c0379e42b4f561cb1fc5159659d8e9a7b6f)
1280ab
---
1280ab
 asmcomp/arm64/arch.ml            | 146 ++++++++
1280ab
 asmcomp/arm64/emit.mlp           | 742 +++++++++++++++++++++++++++++++++++++++
1280ab
 asmcomp/arm64/proc.ml            | 212 +++++++++++
1280ab
 asmcomp/arm64/reload.ml          |  16 +
1280ab
 asmcomp/arm64/scheduling.ml      |  18 +
1280ab
 asmcomp/arm64/selection.ml       | 265 ++++++++++++++
1280ab
 asmcomp/compilenv.ml             |   9 +
1280ab
 asmcomp/compilenv.mli            |   4 +
1280ab
 asmrun/arm64.S                   | 535 ++++++++++++++++++++++++++++
1280ab
 asmrun/signals_osdep.h           |  19 +
1280ab
 asmrun/stack.h                   |   5 +
1280ab
 byterun/interp.c                 |   6 +
1280ab
 configure                        |   5 +-
1280ab
 otherlibs/num/bng.c              |   6 +-
1280ab
 otherlibs/num/bng_arm64.c        |  20 ++
1280ab
 testsuite/tests/asmcomp/Makefile |   2 +-
1280ab
 testsuite/tests/asmcomp/arm64.S  |  52 +++
1280ab
 testsuite/tests/asmcomp/main.ml  |   1 +
1280ab
 18 files changed, 2057 insertions(+), 6 deletions(-)
1280ab
 create mode 100644 asmcomp/arm64/arch.ml
1280ab
 create mode 100644 asmcomp/arm64/emit.mlp
1280ab
 create mode 100644 asmcomp/arm64/proc.ml
1280ab
 create mode 100644 asmcomp/arm64/reload.ml
1280ab
 create mode 100644 asmcomp/arm64/scheduling.ml
1280ab
 create mode 100644 asmcomp/arm64/selection.ml
1280ab
 create mode 100644 asmrun/arm64.S
1280ab
 create mode 100644 otherlibs/num/bng_arm64.c
1280ab
 create mode 100644 testsuite/tests/asmcomp/arm64.S
1280ab
1280ab
diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml
1280ab
new file mode 100644
1280ab
index 0000000..a53251f
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/arch.ml
1280ab
@@ -0,0 +1,146 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                  Benedikt Meurer, University of Siegen              *)
1280ab
+(*                                                                     *)
1280ab
+(*    Copyright 2013 Institut National de Recherche en Informatique    *)
1280ab
+(*    et en Automatique. Copyright 2012 Benedikt Meurer. All rights    *)
1280ab
+(*    reserved.  This file is distributed  under the terms of the Q    *)
1280ab
+(*    Public License version 1.0.                                      *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+let command_line_options = []
1280ab
+
1280ab
+(* Specific operations for the ARM processor, 64-bit mode *)
1280ab
+
1280ab
+open Format
1280ab
+
1280ab
+let command_line_options = []
1280ab
+
1280ab
+(* Addressing modes *)
1280ab
+
1280ab
+type addressing_mode =
1280ab
+  | Iindexed of int                     (* reg + displ *)
1280ab
+  | Ibased of string * int              (* global var + displ *)
1280ab
+
1280ab
+(* We do not support the reg + shifted reg addressing mode, because
1280ab
+   what we really need is reg + shifted reg + displ,
1280ab
+   and this is decomposed in two instructions (reg + shifted reg -> tmp,
1280ab
+   then addressing tmp + displ). *)
1280ab
+
1280ab
+(* Specific operations *)
1280ab
+
1280ab
+type specific_operation =
1280ab
+  | Ishiftarith of arith_operation * int
1280ab
+  | Ishiftcheckbound of int
1280ab
+  | Imuladd       (* multiply and add *)
1280ab
+  | Imulsub       (* multiply and subtract *)
1280ab
+  | Inegmulf      (* floating-point negate and multiply *)
1280ab
+  | Imuladdf      (* floating-point multiply and add *)
1280ab
+  | Inegmuladdf   (* floating-point negate, multiply and add *)
1280ab
+  | Imulsubf      (* floating-point multiply and subtract *)
1280ab
+  | Inegmulsubf   (* floating-point negate, multiply and subtract *)
1280ab
+  | Isqrtf        (* floating-point square root *)
1280ab
+  | Ibswap of int (* endianess conversion *)
1280ab
+
1280ab
+and arith_operation =
1280ab
+    Ishiftadd
1280ab
+  | Ishiftsub
1280ab
+
1280ab
+(* Sizes, endianness *)
1280ab
+
1280ab
+let big_endian = false
1280ab
+
1280ab
+let size_addr = 8
1280ab
+let size_int = 8
1280ab
+let size_float = 8
1280ab
+
1280ab
+let allow_unaligned_access = false
1280ab
+
1280ab
+(* Behavior of division *)
1280ab
+
1280ab
+let division_crashes_on_overflow = false
1280ab
+
1280ab
+(* Operations on addressing modes *)
1280ab
+
1280ab
+let identity_addressing = Iindexed 0
1280ab
+
1280ab
+let offset_addressing addr delta =
1280ab
+  match addr with
1280ab
+  | Iindexed n -> Iindexed(n + delta)
1280ab
+  | Ibased(s, n) -> Ibased(s, n + delta)
1280ab
+
1280ab
+let num_args_addressing = function
1280ab
+  | Iindexed n -> 1
1280ab
+  | Ibased(s, n) -> 0
1280ab
+
1280ab
+(* Printing operations and addressing modes *)
1280ab
+
1280ab
+let print_addressing printreg addr ppf arg =
1280ab
+  match addr with
1280ab
+  | Iindexed n ->
1280ab
+      printreg ppf arg.(0);
1280ab
+      if n <> 0 then fprintf ppf " + %i" n
1280ab
+  | Ibased(s, 0) ->
1280ab
+      fprintf ppf "\"%s\"" s
1280ab
+  | Ibased(s, n) ->
1280ab
+      fprintf ppf "\"%s\" + %i" s n
1280ab
+
1280ab
+let print_specific_operation printreg op ppf arg =
1280ab
+  match op with
1280ab
+  | Ishiftarith(op, shift) ->
1280ab
+      let op_name = function
1280ab
+      | Ishiftadd -> "+"
1280ab
+      | Ishiftsub -> "-" in
1280ab
+      let shift_mark =
1280ab
+       if shift >= 0
1280ab
+       then sprintf "<< %i" shift
1280ab
+       else sprintf ">> %i" (-shift) in
1280ab
+      fprintf ppf "%a %s %a %s"
1280ab
+       printreg arg.(0) (op_name op) printreg arg.(1) shift_mark
1280ab
+  | Ishiftcheckbound n ->
1280ab
+      fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
1280ab
+  | Imuladd ->
1280ab
+      fprintf ppf "(%a * %a) + %a"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Imulsub ->
1280ab
+      fprintf ppf "-(%a * %a) + %a"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Inegmulf ->
1280ab
+      fprintf ppf "-f (%a *f %a)"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+  | Imuladdf ->
1280ab
+      fprintf ppf "%a +f (%a *f %a)"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Inegmuladdf ->
1280ab
+      fprintf ppf "(-f %a) -f (%a *f %a)"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Imulsubf ->
1280ab
+      fprintf ppf "%a -f (%a *f %a)"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Inegmulsubf ->
1280ab
+      fprintf ppf "(-f %a) +f (%a *f %a)"
1280ab
+        printreg arg.(0)
1280ab
+        printreg arg.(1)
1280ab
+        printreg arg.(2)
1280ab
+  | Isqrtf ->
1280ab
+      fprintf ppf "sqrtf %a"
1280ab
+        printreg arg.(0)
1280ab
+  | Ibswap n ->
1280ab
+      fprintf ppf "bswap%i %a" n
1280ab
+        printreg arg.(0)
1280ab
+
1280ab
diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp
1280ab
new file mode 100644
1280ab
index 0000000..fc9649c
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/emit.mlp
1280ab
@@ -0,0 +1,742 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                  Benedikt Meurer, University of Siegen              *)
1280ab
+(*                                                                     *)
1280ab
+(*    Copyright 2013 Institut National de Recherche en Informatique    *)
1280ab
+(*    et en Automatique. Copyright 2012 Benedikt Meurer. All rights    *)
1280ab
+(*    reserved.  This file is distributed  under the terms of the Q    *)
1280ab
+(*    Public License version 1.0.                                      *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+(* Emission of ARM assembly code, 64-bit mode *)
1280ab
+
1280ab
+open Misc
1280ab
+open Cmm
1280ab
+open Arch
1280ab
+open Proc
1280ab
+open Reg
1280ab
+open Mach
1280ab
+open Linearize
1280ab
+open Emitaux
1280ab
+
1280ab
+(* Tradeoff between code size and code speed *)
1280ab
+
1280ab
+let fastcode_flag = ref true
1280ab
+
1280ab
+(* Names for special regs *)
1280ab
+
1280ab
+let reg_trap_ptr = phys_reg 23
1280ab
+let reg_alloc_ptr = phys_reg 24
1280ab
+let reg_alloc_limit = phys_reg 25
1280ab
+let reg_tmp1 = phys_reg 26
1280ab
+let reg_tmp2 = phys_reg 27
1280ab
+let reg_x15 = phys_reg 15
1280ab
+
1280ab
+(* Output a label *)
1280ab
+
1280ab
+let emit_label lbl =
1280ab
+  emit_string ".L"; emit_int lbl
1280ab
+
1280ab
+let emit_data_label lbl =
1280ab
+  emit_string ".Ld"; emit_int lbl
1280ab
+
1280ab
+(* Symbols *)
1280ab
+
1280ab
+let emit_symbol s =
1280ab
+  Emitaux.emit_symbol '$' s
1280ab
+
1280ab
+(* Output a pseudo-register *)
1280ab
+
1280ab
+let emit_reg = function
1280ab
+    {loc = Reg r} -> emit_string (register_name r)
1280ab
+  | _ -> fatal_error "Emit.emit_reg"
1280ab
+
1280ab
+(* Likewise, but with the 32-bit name of the register *)
1280ab
+
1280ab
+let int_reg_name_w =
1280ab
+  [| "w0";  "w1";  "w2";  "w3";  "w4";  "w5";  "w6";  "w7";
1280ab
+     "w8";  "w9";  "w10"; "w11"; "w12"; "w13"; "w14"; "w15";
1280ab
+     "w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25";
1280ab
+     "w26"; "w27"; "w28"; "w16"; "w17" |]
1280ab
+
1280ab
+let emit_wreg = function
1280ab
+    {loc = Reg r} -> emit_string int_reg_name_w.(r)
1280ab
+  | _ -> fatal_error "Emit.emit_wreg"
1280ab
+
1280ab
+(* Layout of the stack frame *)
1280ab
+
1280ab
+let stack_offset = ref 0
1280ab
+
1280ab
+let frame_size () =
1280ab
+  let sz =
1280ab
+    !stack_offset +
1280ab
+    8 * num_stack_slots.(0) +
1280ab
+    8 * num_stack_slots.(1) +
1280ab
+    (if !contains_calls then 8 else 0)
1280ab
+  in Misc.align sz 16
1280ab
+
1280ab
+let slot_offset loc cl =
1280ab
+  match loc with
1280ab
+    Incoming n ->
1280ab
+      assert (n >= 0);
1280ab
+      frame_size() + n
1280ab
+  | Local n ->
1280ab
+      !stack_offset +
1280ab
+      (if cl = 0
1280ab
+       then n * 8
1280ab
+       else num_stack_slots.(0) * 8 + n * 8)
1280ab
+  | Outgoing n ->
1280ab
+      assert (n >= 0);
1280ab
+      n
1280ab
+
1280ab
+(* Output a stack reference *)
1280ab
+
1280ab
+let emit_stack r =
1280ab
+  match r.loc with
1280ab
+  | Stack s ->
1280ab
+      let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
1280ab
+  | _ -> fatal_error "Emit.emit_stack"
1280ab
+
1280ab
+(* Output an addressing mode *)
1280ab
+
1280ab
+let emit_symbol_offset s ofs =
1280ab
+  emit_symbol s; 
1280ab
+  if ofs > 0 then `+{emit_int ofs}`
1280ab
+  else if ofs < 0 then `-{emit_int (-ofs)}`
1280ab
+  else ()
1280ab
+
1280ab
+let emit_addressing addr r =
1280ab
+  match addr with
1280ab
+  | Iindexed ofs ->
1280ab
+      `[{emit_reg r}, #{emit_int ofs}]`
1280ab
+  | Ibased(s, ofs) ->
1280ab
+      `[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]`
1280ab
+
1280ab
+(* Record live pointers at call points *)
1280ab
+
1280ab
+let record_frame_label live dbg =
1280ab
+  let lbl = new_label() in
1280ab
+  let live_offset = ref [] in
1280ab
+  Reg.Set.iter
1280ab
+    (function
1280ab
+        {typ = Addr; loc = Reg r} ->
1280ab
+          live_offset := ((r lsl 1) + 1) :: !live_offset
1280ab
+      | {typ = Addr; loc = Stack s} as reg ->
1280ab
+          live_offset := slot_offset s (register_class reg) :: !live_offset
1280ab
+      | _ -> ())
1280ab
+    live;
1280ab
+  frame_descriptors :=
1280ab
+    { fd_lbl = lbl;
1280ab
+      fd_frame_size = frame_size();
1280ab
+      fd_live_offset = !live_offset;
1280ab
+      fd_debuginfo = dbg } :: !frame_descriptors;
1280ab
+  lbl
1280ab
+
1280ab
+let record_frame live dbg =
1280ab
+  let lbl = record_frame_label live dbg in `{emit_label lbl}:`
1280ab
+
1280ab
+(* Record calls to the GC -- we've moved them out of the way *)
1280ab
+
1280ab
+type gc_call =
1280ab
+  { gc_lbl: label;                      (* Entry label *)
1280ab
+    gc_return_lbl: label;               (* Where to branch after GC *)
1280ab
+    gc_frame_lbl: label }               (* Label of frame descriptor *)
1280ab
+
1280ab
+let call_gc_sites = ref ([] : gc_call list)
1280ab
+
1280ab
+let emit_call_gc gc =
1280ab
+  `{emit_label gc.gc_lbl}:	bl	{emit_symbol "caml_call_gc"}\n`;
1280ab
+  `{emit_label gc.gc_frame_lbl}:	b	{emit_label gc.gc_return_lbl}\n`
1280ab
+
1280ab
+(* Record calls to caml_ml_array_bound_error.
1280ab
+   In debug mode, we maintain one call to caml_ml_array_bound_error
1280ab
+   per bound check site. Otherwise, we can share a single call. *)
1280ab
+
1280ab
+type bound_error_call =
1280ab
+  { bd_lbl: label;                    (* Entry label *)
1280ab
+    bd_frame_lbl: label }             (* Label of frame descriptor *)
1280ab
+
1280ab
+let bound_error_sites = ref ([] : bound_error_call list)
1280ab
+
1280ab
+let bound_error_label dbg =
1280ab
+  if !Clflags.debug || !bound_error_sites = [] then begin
1280ab
+    let lbl_bound_error = new_label() in
1280ab
+    let lbl_frame = record_frame_label Reg.Set.empty dbg in
1280ab
+    bound_error_sites :=
1280ab
+      { bd_lbl = lbl_bound_error;
1280ab
+        bd_frame_lbl = lbl_frame } :: !bound_error_sites;
1280ab
+    lbl_bound_error
1280ab
+  end else begin
1280ab
+    let bd = List.hd !bound_error_sites in bd.bd_lbl
1280ab
+  end
1280ab
+
1280ab
+let emit_call_bound_error bd =
1280ab
+  `{emit_label bd.bd_lbl}:	bl	{emit_symbol "caml_ml_array_bound_error"}\n`;
1280ab
+  `{emit_label bd.bd_frame_lbl}:\n`
1280ab
+
1280ab
+(* Names of various instructions *)
1280ab
+
1280ab
+let name_for_comparison = function
1280ab
+  | Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
1280ab
+  | Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
1280ab
+  | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
1280ab
+  | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
1280ab
+
1280ab
+let name_for_int_operation = function
1280ab
+  | Iadd -> "add"
1280ab
+  | Isub -> "sub"
1280ab
+  | Imul -> "mul"
1280ab
+  | Idiv -> "sdiv"
1280ab
+  | Iand -> "and"
1280ab
+  | Ior  -> "orr"
1280ab
+  | Ixor -> "eor"
1280ab
+  | Ilsl -> "lsl"
1280ab
+  | Ilsr -> "lsr"
1280ab
+  | Iasr -> "asr"
1280ab
+  | _ -> assert false
1280ab
+
1280ab
+(* Load an integer constant into a register *)
1280ab
+
1280ab
+let emit_intconst dst n =
1280ab
+  let rec emit_pos first shift =
1280ab
+    if shift < 0 then begin
1280ab
+      if first then `	mov	{emit_reg dst}, xzr\n`
1280ab
+    end else begin
1280ab
+      let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
1280ab
+      if s = 0n then emit_pos first (shift - 16) else begin
1280ab
+        if first then
1280ab
+          `	movz	{emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
1280ab
+        else
1280ab
+           `	movk	{emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
1280ab
+        emit_pos false (shift - 16)
1280ab
+      end
1280ab
+    end
1280ab
+  and emit_neg first shift =
1280ab
+    if shift < 0 then begin
1280ab
+      if first then `	movn	{emit_reg dst}, #0\n`
1280ab
+    end else begin
1280ab
+      let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
1280ab
+      if s = 0xFFFFn then emit_neg first (shift - 16) else begin
1280ab
+        if first then
1280ab
+          `	movn	{emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
1280ab
+        else
1280ab
+           `	movk	{emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
1280ab
+        emit_neg false (shift - 16)
1280ab
+      end
1280ab
+    end
1280ab
+  in
1280ab
+    if n < 0n then emit_neg true 48 else emit_pos true 48
1280ab
+
1280ab
+(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
1280ab
+   "a normalized binary floating point encoding with 1 sign bit, 4
1280ab
+    bits of fraction and a 3-bit exponent" *)
1280ab
+
1280ab
+let is_immediate_float bits =
1280ab
+  let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
1280ab
+  let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
1280ab
+  exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
1280ab
+
1280ab
+(* Adjust sp (up or down) by the given byte amount *)
1280ab
+
1280ab
+let emit_stack_adjustment n =
1280ab
+  let instr = if n < 0 then "sub" else "add" in
1280ab
+  let m = abs n in
1280ab
+  assert (m < 0x1_000_000);
1280ab
+  let ml = m land 0xFFF and mh = m land 0xFFF_000 in
1280ab
+  if mh <> 0 then `	{emit_string instr}	sp, sp, #{emit_int mh}\n`;
1280ab
+  if ml <> 0 then `	{emit_string instr}	sp, sp, #{emit_int ml}\n`;
1280ab
+  if n <> 0 then cfi_adjust_cfa_offset (-n)
1280ab
+
1280ab
+(* Deallocate the stack frame and reload the return address
1280ab
+   before a return or tail call *)
1280ab
+
1280ab
+let output_epilogue f =
1280ab
+  let n = frame_size() in
1280ab
+  if !contains_calls then
1280ab
+    `	ldr	x30, [sp, #{emit_int (n-8)}]\n`;
1280ab
+  if n > 0 then
1280ab
+    emit_stack_adjustment n;
1280ab
+  f();
1280ab
+  (* reset CFA back because function body may continue *)
1280ab
+  if n > 0 then cfi_adjust_cfa_offset n
1280ab
+
1280ab
+(* Name of current function *)
1280ab
+let function_name = ref ""
1280ab
+(* Entry point for tail recursive calls *)
1280ab
+let tailrec_entry_point = ref 0
1280ab
+(* Pending floating-point literals *)
1280ab
+let float_literals = ref ([] : (int64 * label) list)
1280ab
+
1280ab
+(* Label a floating-point literal *)
1280ab
+let float_literal f =
1280ab
+  try
1280ab
+    List.assoc f !float_literals
1280ab
+  with Not_found ->
1280ab
+    let lbl = new_label() in
1280ab
+    float_literals := (f, lbl) :: !float_literals;
1280ab
+    lbl
1280ab
+
1280ab
+(* Emit all pending literals *)
1280ab
+let emit_literals() =
1280ab
+  if !float_literals <> [] then begin
1280ab
+    `	.align	3\n`;
1280ab
+    List.iter
1280ab
+      (fun (f, lbl) ->
1280ab
+        `{emit_label lbl}:	.quad	`; emit_printf "0x%Lx\n" f)
1280ab
+      !float_literals;
1280ab
+    float_literals := []
1280ab
+  end
1280ab
+
1280ab
+(* Emit code to load the address of a symbol *)
1280ab
+
1280ab
+let emit_load_symbol_addr dst s =
1280ab
+  if (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit s then begin
1280ab
+    `	adrp	{emit_reg dst}, {emit_symbol s}\n`;
1280ab
+    `	add	{emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n`
1280ab
+  end else begin
1280ab
+    `	adrp	{emit_reg dst}, :got:{emit_symbol s}\n`;
1280ab
+    `	ldr	{emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n`
1280ab
+  end
1280ab
+
1280ab
+(* Output the assembly code for an instruction *)
1280ab
+
1280ab
+let emit_instr i =
1280ab
+    emit_debug_info i.dbg;
1280ab
+    match i.desc with
1280ab
+    | Lend -> ()
1280ab
+    | Lop(Imove | Ispill | Ireload) ->
1280ab
+        let src = i.arg.(0) and dst = i.res.(0) in
1280ab
+        if src.loc <> dst.loc then begin
1280ab
+          match (src, dst) with
1280ab
+          | {loc = Reg _; typ = Float}, {loc = Reg _} ->
1280ab
+              `	fmov	{emit_reg dst}, {emit_reg src}\n`
1280ab
+          | {loc = Reg _}, {loc = Reg _} ->
1280ab
+              `	mov	{emit_reg dst}, {emit_reg src}\n`
1280ab
+          | {loc = Reg _}, {loc = Stack _} ->
1280ab
+              `	str	{emit_reg src}, {emit_stack dst}\n`
1280ab
+          | {loc = Stack _}, {loc = Reg _} ->
1280ab
+              `	ldr	{emit_reg dst}, {emit_stack src}\n`
1280ab
+          | _ ->
1280ab
+              assert false
1280ab
+        end
1280ab
+    | Lop(Iconst_int n) ->
1280ab
+        emit_intconst i.res.(0) n
1280ab
+    | Lop(Iconst_float f) ->
1280ab
+        let b = Int64.bits_of_float(float_of_string f) in
1280ab
+        if b = 0L then
1280ab
+          `	fmov	{emit_reg i.res.(0)}, xzr	/* {emit_string f} */\n`
1280ab
+        else if is_immediate_float b then
1280ab
+          `	fmov	{emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}	/* {emit_string f} */\n`
1280ab
+        else begin
1280ab
+          let lbl = float_literal b in
1280ab
+          `	adrp	{emit_reg reg_tmp1}, {emit_label lbl}\n`;
1280ab
+          `	ldr	{emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]	/* {emit_string f} */\n`
1280ab
+        end
1280ab
+    | Lop(Iconst_symbol s) ->
1280ab
+        emit_load_symbol_addr i.res.(0) s
1280ab
+    | Lop(Icall_ind) ->
1280ab
+        `	blr	{emit_reg i.arg.(0)}\n`;
1280ab
+        `{record_frame i.live i.dbg}\n`
1280ab
+    | Lop(Icall_imm s) ->
1280ab
+        `	bl	{emit_symbol s}\n`;
1280ab
+        `{record_frame i.live i.dbg}\n`
1280ab
+    | Lop(Itailcall_ind) ->
1280ab
+        output_epilogue (fun () -> `	br	{emit_reg i.arg.(0)}\n`)
1280ab
+    | Lop(Itailcall_imm s) ->
1280ab
+        if s = !function_name then
1280ab
+          `	b	{emit_label !tailrec_entry_point}\n`
1280ab
+        else
1280ab
+          output_epilogue (fun () -> `	b	{emit_symbol s}\n`)
1280ab
+    | Lop(Iextcall(s, false)) ->
1280ab
+        `	bl	{emit_symbol s}\n`
1280ab
+    | Lop(Iextcall(s, true)) ->
1280ab
+        emit_load_symbol_addr reg_x15 s;
1280ab
+        `	bl	{emit_symbol "caml_c_call"}\n`;
1280ab
+        `{record_frame i.live i.dbg}\n`
1280ab
+    | Lop(Istackoffset n) ->
1280ab
+        assert (n mod 16 = 0);
1280ab
+        emit_stack_adjustment (-n);
1280ab
+        stack_offset := !stack_offset + n
1280ab
+    | Lop(Iload(size, addr)) ->
1280ab
+        let dst = i.res.(0) in
1280ab
+        let base =
1280ab
+          match addr with
1280ab
+          | Iindexed ofs -> i.arg.(0)
1280ab
+          | Ibased(s, ofs) ->
1280ab
+              `	adrp	{emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
1280ab
+              reg_tmp1 in
1280ab
+        begin match size with
1280ab
+        | Byte_unsigned ->
1280ab
+            `	ldrb	{emit_wreg dst}, {emit_addressing addr base}\n`
1280ab
+        | Byte_signed ->
1280ab
+            `	ldrsb	{emit_reg dst}, {emit_addressing addr base}\n`
1280ab
+        | Sixteen_unsigned ->
1280ab
+            `	ldrh	{emit_wreg dst}, {emit_addressing addr base}\n`
1280ab
+        | Sixteen_signed ->
1280ab
+            `	ldrsh	{emit_reg dst}, {emit_addressing addr base}\n`
1280ab
+        | Thirtytwo_unsigned ->
1280ab
+            `	ldr	{emit_wreg dst}, {emit_addressing addr base}\n`
1280ab
+        | Thirtytwo_signed ->
1280ab
+            `	ldrsw	{emit_reg dst}, {emit_addressing addr base}\n`
1280ab
+        | Single ->
1280ab
+            `	ldr	s7, {emit_addressing addr base}\n`;
1280ab
+            `	fcvt	{emit_reg dst}, s7\n`
1280ab
+        | Word | Double | Double_u ->
1280ab
+            `	ldr	{emit_reg dst}, {emit_addressing addr base}\n`
1280ab
+        end
1280ab
+    | Lop(Istore(size, addr)) ->
1280ab
+        let src = i.arg.(0) in
1280ab
+        let base =
1280ab
+          match addr with
1280ab
+          | Iindexed ofs -> i.arg.(1)
1280ab
+          | Ibased(s, ofs) ->
1280ab
+              `	adrp	{emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
1280ab
+              reg_tmp1 in
1280ab
+        begin match size with
1280ab
+        | Byte_unsigned | Byte_signed ->
1280ab
+            `	strb	{emit_wreg src}, {emit_addressing addr base}\n`
1280ab
+        | Sixteen_unsigned | Sixteen_signed ->
1280ab
+            `	strh	{emit_wreg src}, {emit_addressing addr base}\n`
1280ab
+        | Thirtytwo_unsigned | Thirtytwo_signed ->
1280ab
+            `	str	{emit_wreg src}, {emit_addressing addr base}\n`
1280ab
+        | Single ->
1280ab
+            `	fcvt	s7, {emit_reg src}\n`;
1280ab
+            `	str	s7, {emit_addressing addr base}\n`;
1280ab
+        | Word | Double | Double_u ->
1280ab
+            `	str	{emit_reg src}, {emit_addressing addr base}\n`
1280ab
+        end
1280ab
+    | Lop(Ialloc n) ->
1280ab
+        let lbl_frame = record_frame_label i.live i.dbg in
1280ab
+        if !fastcode_flag then begin
1280ab
+          let lbl_redo = new_label() in
1280ab
+          let lbl_call_gc = new_label() in
1280ab
+          `{emit_label lbl_redo}:`;
1280ab
+          `	sub	{emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
1280ab
+          `	cmp	{emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
1280ab
+          `	add	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`;
1280ab
+          `	b.lo	{emit_label lbl_call_gc}\n`;
1280ab
+          call_gc_sites :=
1280ab
+            { gc_lbl = lbl_call_gc;
1280ab
+              gc_return_lbl = lbl_redo;
1280ab
+              gc_frame_lbl = lbl_frame } :: !call_gc_sites
1280ab
+        end else begin
1280ab
+          begin match n with
1280ab
+          | 16 -> `	bl	{emit_symbol "caml_alloc1"}\n`
1280ab
+          | 24 -> `	bl	{emit_symbol "caml_alloc2"}\n`
1280ab
+          | 32 -> `	bl	{emit_symbol "caml_alloc3"}\n`
1280ab
+          | _  -> emit_intconst reg_x15 (Nativeint.of_int n);
1280ab
+                  `	bl	{emit_symbol "caml_allocN"}\n`
1280ab
+          end;
1280ab
+          `{emit_label lbl_frame}:	add	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`
1280ab
+        end
1280ab
+    | Lop(Iintop(Icomp cmp)) ->
1280ab
+        `	cmp	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1280ab
+        `	cset	{emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
1280ab
+    | Lop(Iintop_imm(Icomp cmp, n)) ->
1280ab
+        `	cmp	{emit_reg i.arg.(0)}, #{emit_int n}\n`;
1280ab
+        `	cset	{emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
1280ab
+    | Lop(Iintop Icheckbound) ->
1280ab
+        let lbl = bound_error_label i.dbg in
1280ab
+        `	cmp	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1280ab
+        `	b.ls	{emit_label lbl}\n`
1280ab
+    | Lop(Iintop_imm(Icheckbound, n)) ->
1280ab
+        let lbl = bound_error_label i.dbg in
1280ab
+        `	cmp	{emit_reg i.arg.(0)}, #{emit_int n}\n`;
1280ab
+        `	b.ls	{emit_label lbl}\n`
1280ab
+    | Lop(Ispecific(Ishiftcheckbound shift)) ->
1280ab
+        let lbl = bound_error_label i.dbg in
1280ab
+        `	cmp	{emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
1280ab
+        `	b.cs	{emit_label lbl}\n`
1280ab
+    | Lop(Iintop Imod) ->
1280ab
+        `	sdiv	{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1280ab
+        `	msub	{emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
1280ab
+    | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
1280ab
+        let l = Misc.log2 n in
1280ab
+        `	asr	{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
1280ab
+        `	add	{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
1280ab
+        `	asr	{emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_int l}\n`
1280ab
+    | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
1280ab
+        let l = Misc.log2 n in
1280ab
+        `	asr	{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
1280ab
+        `	add	{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
1280ab
+        `	asr	{emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_int l}\n`;
1280ab
+        `	sub	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsl {emit_int l}\n`
1280ab
+    | Lop(Iintop op) ->
1280ab
+        let instr = name_for_int_operation op in
1280ab
+        `	{emit_string instr}     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
1280ab
+    | Lop(Iintop_imm(op, n)) ->
1280ab
+        let instr = name_for_int_operation op in
1280ab
+        `	{emit_string instr}     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`
1280ab
+    | Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) ->
1280ab
+        let instr = (match op with
1280ab
+                     | Ifloatofint      -> "scvtf"
1280ab
+                     | Iintoffloat      -> "fcvtzs"
1280ab
+                     | Iabsf            -> "fabs"
1280ab
+                     | Inegf            -> "fneg"
1280ab
+                     | Ispecific Isqrtf -> "fsqrt"
1280ab
+                     | _                -> assert false) in
1280ab
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1280ab
+    | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
1280ab
+        let instr = (match op with
1280ab
+                     | Iaddf              -> "fadd"
1280ab
+                     | Isubf              -> "fsub"
1280ab
+                     | Imulf              -> "fmul"
1280ab
+                     | Idivf              -> "fdiv"
1280ab
+                     | Ispecific Inegmulf -> "fnmul"
1280ab
+                     | _                  -> assert false) in
1280ab
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
1280ab
+    | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
1280ab
+        let instr = (match op with
1280ab
+                     | Imuladdf    -> "fmadd"
1280ab
+                     | Inegmuladdf -> "fnmadd"
1280ab
+                     | Imulsubf    -> "fmsub"
1280ab
+                     | Inegmulsubf -> "fnmsub"
1280ab
+                     | _ -> assert false) in
1280ab
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
1280ab
+    | Lop(Ispecific(Ishiftarith(op, shift))) ->
1280ab
+        let instr = (match op with
1280ab
+                       Ishiftadd    -> "add"
1280ab
+                     | Ishiftsub    -> "sub") in
1280ab
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
1280ab
+        if shift >= 0
1280ab
+        then `, lsl #{emit_int shift}\n`
1280ab
+        else `, asr #{emit_int (-shift)}\n`
1280ab
+    | Lop(Ispecific(Imuladd | Imulsub as op)) ->
1280ab
+        let instr = (match op with
1280ab
+                       Imuladd -> "madd"
1280ab
+                     | Imulsub -> "msub"
1280ab
+                     | _ -> assert false) in
1280ab
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
1280ab
+    | Lop(Ispecific(Ibswap size)) ->
1280ab
+        begin match size with
1280ab
+        | 16 ->
1280ab
+            `	rev16	{emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`;
1280ab
+            `	ubfm	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #16\n`
1280ab
+        | 32 ->
1280ab
+            `	rev	{emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`
1280ab
+        | 64 ->
1280ab
+            `	rev	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1280ab
+        | _ ->
1280ab
+            assert false
1280ab
+        end
1280ab
+    | Lreloadretaddr ->
1280ab
+        ()
1280ab
+    | Lreturn ->
1280ab
+        output_epilogue (fun () -> `	ret\n`)
1280ab
+    | Llabel lbl ->
1280ab
+        `{emit_label lbl}:\n`
1280ab
+    | Lbranch lbl ->
1280ab
+        `	b	{emit_label lbl}\n`
1280ab
+    | Lcondbranch(tst, lbl) ->
1280ab
+        begin match tst with
1280ab
+        | Itruetest ->
1280ab
+            `	cbnz	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
1280ab
+        | Ifalsetest ->
1280ab
+            `	cbz	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
1280ab
+        | Iinttest cmp ->
1280ab
+            `	cmp	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1280ab
+            let comp = name_for_comparison cmp in
1280ab
+            `	b.{emit_string comp}	{emit_label lbl}\n`
1280ab
+        | Iinttest_imm(cmp, n) ->
1280ab
+            `	cmp	{emit_reg i.arg.(0)}, #{emit_int n}\n`;
1280ab
+            let comp = name_for_comparison cmp in
1280ab
+            `	b.{emit_string comp}	{emit_label lbl}\n`
1280ab
+        | Ifloattest(cmp, neg) ->
1280ab
+            let comp = (match (cmp, neg) with
1280ab
+                        | (Ceq, false) | (Cne, true) -> "eq"
1280ab
+                        | (Cne, false) | (Ceq, true) -> "ne"
1280ab
+                        | (Clt, false) -> "cc"
1280ab
+                        | (Clt, true)  -> "cs"
1280ab
+                        | (Cle, false) -> "ls"
1280ab
+                        | (Cle, true)  -> "hi"
1280ab
+                        | (Cgt, false) -> "gt"
1280ab
+                        | (Cgt, true)  -> "le"
1280ab
+                        | (Cge, false) -> "ge"
1280ab
+                        | (Cge, true)  -> "lt") in
1280ab
+            `	fcmp	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1280ab
+            `	b.{emit_string comp}	{emit_label lbl}\n`
1280ab
+        | Ioddtest ->
1280ab
+            `	tbnz	{emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
1280ab
+        | Ieventest ->
1280ab
+            `	tbz	{emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
1280ab
+        end
1280ab
+    | Lcondbranch3(lbl0, lbl1, lbl2) ->
1280ab
+        `	cmp	{emit_reg i.arg.(0)}, #1\n`;
1280ab
+        begin match lbl0 with
1280ab
+          None -> ()
1280ab
+        | Some lbl -> `	b.lt	{emit_label lbl}\n`
1280ab
+        end;
1280ab
+        begin match lbl1 with
1280ab
+          None -> ()
1280ab
+        | Some lbl -> `	b.eq	{emit_label lbl}\n`
1280ab
+        end;
1280ab
+        begin match lbl2 with
1280ab
+          None -> ()
1280ab
+        | Some lbl -> `	b.gt	{emit_label lbl}\n`
1280ab
+        end
1280ab
+    | Lswitch jumptbl ->
1280ab
+        let lbltbl = new_label() in
1280ab
+        `	adr	{emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
1280ab
+        `	add	{emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`;
1280ab
+        `	br	{emit_reg reg_tmp1}\n`;
1280ab
+        `{emit_label lbltbl}:`;
1280ab
+        for j = 0 to Array.length jumptbl - 1 do
1280ab
+            `	b	{emit_label jumptbl.(j)}\n`
1280ab
+        done
1280ab
+(* Alternative: 
1280ab
+        let lbltbl = new_label() in
1280ab
+        `	adr	{emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
1280ab
+        `	ldr	{emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`;
1280ab
+        `	add	{emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`;
1280ab
+        `	br	{emit_reg reg_tmp1}\n`;
1280ab
+        `{emit_label lbltbl}:\n`;
1280ab
+        for j = 0 to Array.length jumptbl - 1 do
1280ab
+            `	.word	{emit_label jumptbl.(j)} - {emit_label lbltbl}\n`
1280ab
+        done
1280ab
+*)
1280ab
+    | Lsetuptrap lbl ->
1280ab
+        let lblnext = new_label() in
1280ab
+        `	adr	{emit_reg reg_tmp1}, {emit_label lblnext}\n`;
1280ab
+        `	b	{emit_label lbl}\n`;
1280ab
+        `{emit_label lblnext}:\n`
1280ab
+    | Lpushtrap ->
1280ab
+        stack_offset := !stack_offset + 16;
1280ab
+        `	str	{emit_reg reg_trap_ptr}, [sp, -16]!\n`;
1280ab
+        `	str	{emit_reg reg_tmp1}, [sp, #8]\n`;
1280ab
+        cfi_adjust_cfa_offset 16;
1280ab
+        `	mov	{emit_reg reg_trap_ptr}, sp\n`
1280ab
+    | Lpoptrap ->
1280ab
+        `	ldr	{emit_reg reg_trap_ptr}, [sp], 16\n`;
1280ab
+        cfi_adjust_cfa_offset (-16);
1280ab
+        stack_offset := !stack_offset - 16
1280ab
+    | Lraise ->
1280ab
+        if !Clflags.debug then begin
1280ab
+          `	bl	{emit_symbol "caml_raise_exn"}\n`;
1280ab
+          `{record_frame Reg.Set.empty i.dbg}\n`
1280ab
+        end else begin
1280ab
+          `	mov	sp, {emit_reg reg_trap_ptr}\n`;
1280ab
+          `	ldr	{emit_reg reg_tmp1}, [sp, #8]\n`;
1280ab
+          `	ldr	{emit_reg reg_trap_ptr}, [sp], 16\n`;
1280ab
+          `	br	{emit_reg reg_tmp1}\n`
1280ab
+        end
1280ab
+
1280ab
+(* Emission of an instruction sequence *)
1280ab
+
1280ab
+let rec emit_all i =
1280ab
+  if i.desc = Lend then () else (emit_instr i; emit_all i.next)
1280ab
+
1280ab
+(* Emission of the profiling prelude *)
1280ab
+
1280ab
+let emit_profile() = ()   (* TODO *)
1280ab
+(*
1280ab
+  match Config.system with
1280ab
+    "linux_eabi" | "linux_eabihf" ->
1280ab
+      `	push	\{lr}\n`;
1280ab
+      `	{emit_call "__gnu_mcount_nc"}\n`
1280ab
+  | _ -> ()
1280ab
+*)
1280ab
+
1280ab
+(* Emission of a function declaration *)
1280ab
+
1280ab
+let fundecl fundecl =
1280ab
+  function_name := fundecl.fun_name;
1280ab
+  fastcode_flag := fundecl.fun_fast;
1280ab
+  tailrec_entry_point := new_label();
1280ab
+  float_literals := [];
1280ab
+  stack_offset := 0;
1280ab
+  call_gc_sites := [];
1280ab
+  bound_error_sites := [];
1280ab
+  `	.text\n`;
1280ab
+  `	.align	2\n`;
1280ab
+  `	.globl	{emit_symbol fundecl.fun_name}\n`;
1280ab
+  `	.type	{emit_symbol fundecl.fun_name}, %function\n`;
1280ab
+  `{emit_symbol fundecl.fun_name}:\n`;
1280ab
+  emit_debug_info fundecl.fun_dbg;
1280ab
+  cfi_startproc();
1280ab
+  if !Clflags.gprofile then emit_profile();
1280ab
+  let n = frame_size() in
1280ab
+  if n > 0 then 
1280ab
+    emit_stack_adjustment (-n);
1280ab
+  if !contains_calls then
1280ab
+    `	str	x30, [sp, #{emit_int (n-8)}]\n`;
1280ab
+  `{emit_label !tailrec_entry_point}:\n`;
1280ab
+  emit_all fundecl.fun_body;
1280ab
+  List.iter emit_call_gc !call_gc_sites;
1280ab
+  List.iter emit_call_bound_error !bound_error_sites;
1280ab
+  cfi_endproc();
1280ab
+  `	.type	{emit_symbol fundecl.fun_name}, %function\n`;
1280ab
+  `	.size	{emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
1280ab
+  emit_literals()
1280ab
+
1280ab
+(* Emission of data *)
1280ab
+
1280ab
+let emit_item = function
1280ab
+  | Cglobal_symbol s -> `	.globl	{emit_symbol s}\n`;
1280ab
+  | Cdefine_symbol s -> `{emit_symbol s}:\n`
1280ab
+  | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
1280ab
+  | Cint8 n -> `	.byte	{emit_int n}\n`
1280ab
+  | Cint16 n -> `	.short	{emit_int n}\n`
1280ab
+  | Cint32 n -> `	.long	{emit_nativeint n}\n`
1280ab
+  | Cint n -> `	.quad	{emit_nativeint n}\n`
1280ab
+  | Csingle f -> emit_float32_directive ".long" f
1280ab
+  | Cdouble f -> emit_float64_directive ".quad" f
1280ab
+  | Csymbol_address s -> `	.quad	{emit_symbol s}\n`
1280ab
+  | Clabel_address lbl -> `	.quad	{emit_data_label lbl}\n`
1280ab
+  | Cstring s -> emit_string_directive "	.ascii  " s
1280ab
+  | Cskip n -> if n > 0 then `	.space	{emit_int n}\n`
1280ab
+  | Calign n -> `	.align	{emit_int(Misc.log2 n)}\n`
1280ab
+
1280ab
+let data l =
1280ab
+  `	.data\n`;
1280ab
+  List.iter emit_item l
1280ab
+
1280ab
+(* Beginning / end of an assembly file *)
1280ab
+
1280ab
+let begin_assembly() =
1280ab
+  reset_debug_info();
1280ab
+  let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
1280ab
+  `	.data\n`;
1280ab
+  `	.globl	{emit_symbol lbl_begin}\n`;
1280ab
+  `{emit_symbol lbl_begin}:\n`;
1280ab
+  let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
1280ab
+  `	.text\n`;
1280ab
+  `	.globl	{emit_symbol lbl_begin}\n`;
1280ab
+  `{emit_symbol lbl_begin}:\n`
1280ab
+
1280ab
+let end_assembly () =
1280ab
+  let lbl_end = Compilenv.make_symbol (Some "code_end") in
1280ab
+  `	.text\n`;
1280ab
+  `	.globl	{emit_symbol lbl_end}\n`;
1280ab
+  `{emit_symbol lbl_end}:\n`;
1280ab
+  let lbl_end = Compilenv.make_symbol (Some "data_end") in
1280ab
+  `	.data\n`;
1280ab
+  `	.globl	{emit_symbol lbl_end}\n`;
1280ab
+  `{emit_symbol lbl_end}:\n`;
1280ab
+  `	.long	0\n`;
1280ab
+  let lbl = Compilenv.make_symbol (Some "frametable") in
1280ab
+  `	.globl	{emit_symbol lbl}\n`;
1280ab
+  `{emit_symbol lbl}:\n`;
1280ab
+  emit_frames
1280ab
+    { efa_label = (fun lbl ->
1280ab
+                       `	.type	{emit_label lbl}, %function\n`;
1280ab
+                       `	.quad	{emit_label lbl}\n`);
1280ab
+      efa_16 = (fun n -> `	.short	{emit_int n}\n`);
1280ab
+      efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
1280ab
+      efa_word = (fun n -> `	.quad	{emit_int n}\n`);
1280ab
+      efa_align = (fun n -> `	.align	{emit_int(Misc.log2 n)}\n`);
1280ab
+      efa_label_rel = (fun lbl ofs ->
1280ab
+                           `	.long	{emit_label lbl} - . + {emit_int32 ofs}\n`);
1280ab
+      efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
1280ab
+      efa_string = (fun s -> emit_string_directive "	.asciz	" s) };
1280ab
+  `	.type	{emit_symbol lbl}, %object\n`;
1280ab
+  `	.size	{emit_symbol lbl}, .-{emit_symbol lbl}\n`;
1280ab
+  begin match Config.system with
1280ab
+  | "linux" ->
1280ab
+      (* Mark stack as non-executable *)
1280ab
+      `	.section	.note.GNU-stack,\"\",%progbits\n`
1280ab
+  | _ -> ()
1280ab
+  end
1280ab
diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml
1280ab
new file mode 100644
1280ab
index 0000000..b52c2fd
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/proc.ml
1280ab
@@ -0,0 +1,212 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                  Benedikt Meurer, University of Siegen              *)
1280ab
+(*                                                                     *)
1280ab
+(*    Copyright 2013 Institut National de Recherche en Informatique    *)
1280ab
+(*    et en Automatique. Copyright 2012 Benedikt Meurer. All rights    *)
1280ab
+(*    reserved.  This file is distributed  under the terms of the Q    *)
1280ab
+(*    Public License version 1.0.                                      *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+(* Description of the ARM processor in 64-bit mode *)
1280ab
+
1280ab
+open Misc
1280ab
+open Cmm
1280ab
+open Reg
1280ab
+open Arch
1280ab
+open Mach
1280ab
+
1280ab
+(* Instruction selection *)
1280ab
+
1280ab
+let word_addressed = false
1280ab
+
1280ab
+(* Registers available for register allocation *)
1280ab
+
1280ab
+(* Integer register map:
1280ab
+    x0 - x15              general purpose (caller-save)
1280ab
+    x16, x17              temporaries (used by call veeners)
1280ab
+    x18                   platform register (reserved)
1280ab
+    x19 - x25             general purpose (callee-save)
1280ab
+    x26                   trap pointer
1280ab
+    x27                   alloc pointer
1280ab
+    x28                   alloc limit
1280ab
+    x29                   frame pointer
1280ab
+    x30                   return address
1280ab
+    sp / xzr              stack pointer / zero register
1280ab
+   Floating-point register map:
1280ab
+    d0 - d7               general purpose (caller-save)
1280ab
+    d8 - d15              general purpose (callee-save)
1280ab
+    d16 - d31             generat purpose (caller-save)
1280ab
+*)
1280ab
+
1280ab
+let int_reg_name =
1280ab
+  [| "x0";  "x1";  "x2";  "x3";  "x4";  "x5";  "x6";  "x7";
1280ab
+     "x8";  "x9";  "x10"; "x11"; "x12"; "x13"; "x14"; "x15";
1280ab
+     "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25";
1280ab
+     "x26"; "x27"; "x28"; "x16"; "x17" |]
1280ab
+
1280ab
+let float_reg_name =
1280ab
+  [| "d0";  "d1";  "d2";  "d3";  "d4";  "d5";  "d6";  "d7";
1280ab
+     "d8";  "d9";  "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
1280ab
+     "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
1280ab
+     "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
1280ab
+
1280ab
+let num_register_classes = 2
1280ab
+
1280ab
+let register_class r =
1280ab
+  match r.typ with
1280ab
+  | (Int | Addr)  -> 0
1280ab
+  | Float         -> 1
1280ab
+
1280ab
+let num_available_registers =
1280ab
+  [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
1280ab
+
1280ab
+let first_available_register =
1280ab
+  [| 0; 100 |]
1280ab
+
1280ab
+let register_name r =
1280ab
+  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
1280ab
+
1280ab
+let rotate_registers = true
1280ab
+
1280ab
+(* Representation of hard registers by pseudo-registers *)
1280ab
+
1280ab
+let hard_int_reg =
1280ab
+  let v = Array.create 28 Reg.dummy in
1280ab
+  for i = 0 to 27 do
1280ab
+    v.(i) <- Reg.at_location Int (Reg i)
1280ab
+  done;
1280ab
+  v
1280ab
+
1280ab
+let hard_float_reg =
1280ab
+  let v = Array.create 32 Reg.dummy in
1280ab
+  for i = 0 to 31 do
1280ab
+    v.(i) <- Reg.at_location Float (Reg(100 + i))
1280ab
+  done;
1280ab
+  v
1280ab
+
1280ab
+let all_phys_regs =
1280ab
+  Array.append hard_int_reg hard_float_reg
1280ab
+
1280ab
+let phys_reg n =
1280ab
+  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
1280ab
+
1280ab
+let reg_x15 = phys_reg 15
1280ab
+let reg_d7 = phys_reg 107
1280ab
+
1280ab
+let stack_slot slot ty =
1280ab
+  Reg.at_location ty (Stack slot)
1280ab
+
1280ab
+(* Calling conventions *)
1280ab
+
1280ab
+let calling_conventions
1280ab
+    first_int last_int first_float last_float make_stack arg =
1280ab
+  let loc = Array.create (Array.length arg) Reg.dummy in
1280ab
+  let int = ref first_int in
1280ab
+  let float = ref first_float in
1280ab
+  let ofs = ref 0 in
1280ab
+  for i = 0 to Array.length arg - 1 do
1280ab
+    match arg.(i).typ with
1280ab
+      Int | Addr as ty ->
1280ab
+        if !int <= last_int then begin
1280ab
+          loc.(i) <- phys_reg !int;
1280ab
+          incr int
1280ab
+        end else begin
1280ab
+          loc.(i) <- stack_slot (make_stack !ofs) ty;
1280ab
+          ofs := !ofs + size_int
1280ab
+        end
1280ab
+    | Float ->
1280ab
+        if !float <= last_float then begin
1280ab
+          loc.(i) <- phys_reg !float;
1280ab
+          incr float
1280ab
+        end else begin
1280ab
+          loc.(i) <- stack_slot (make_stack !ofs) Float;
1280ab
+          ofs := !ofs + size_float
1280ab
+        end
1280ab
+  done;
1280ab
+  (loc, Misc.align !ofs 16)  (* keep stack 16-aligned *)
1280ab
+
1280ab
+let incoming ofs = Incoming ofs
1280ab
+let outgoing ofs = Outgoing ofs
1280ab
+let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
1280ab
+
1280ab
+(* OCaml calling convention:
1280ab
+     first integer args in r0...r15
1280ab
+     first float args in d0...d15
1280ab
+     remaining args on stack.
1280ab
+   Return values in r0...r15 or d0...d15. *)
1280ab
+
1280ab
+let loc_arguments arg =
1280ab
+  calling_conventions 0 15 100 115 outgoing arg
1280ab
+let loc_parameters arg =
1280ab
+  let (loc, _) = calling_conventions 0 15 100 115 incoming arg in loc
1280ab
+let loc_results res =
1280ab
+  let (loc, _) = calling_conventions 0 15 100 115 not_supported res in loc
1280ab
+
1280ab
+(* C calling convention:
1280ab
+     first integer args in r0...r7
1280ab
+     first float args in d0...d7
1280ab
+     remaining args on stack.
1280ab
+   Return values in r0...r1 or d0. *)
1280ab
+
1280ab
+let loc_external_arguments arg =
1280ab
+  calling_conventions 0 7 100 107 outgoing arg
1280ab
+let loc_external_results res =
1280ab
+  let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
1280ab
+
1280ab
+let loc_exn_bucket = phys_reg 0
1280ab
+
1280ab
+(* Registers destroyed by operations *)
1280ab
+
1280ab
+let destroyed_at_c_call =
1280ab
+  (* x19-x28, d8-d15 preserved *)
1280ab
+  Array.of_list (List.map phys_reg
1280ab
+    [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;
1280ab
+     100;101;102;103;104;105;106;107;
1280ab
+     116;117;118;119;120;121;122;123;
1280ab
+     124;125;126;127;128;129;130;131])
1280ab
+
1280ab
+let destroyed_at_oper = function
1280ab
+  | Iop(Icall_ind | Icall_imm _) | Iop(Iextcall(_, true)) ->
1280ab
+      all_phys_regs
1280ab
+  | Iop(Iextcall(_, false)) ->
1280ab
+      destroyed_at_c_call
1280ab
+  | Iop(Ialloc _) ->
1280ab
+      [| reg_x15 |]
1280ab
+  | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
1280ab
+      [| reg_d7 |]            (* d7 / s7 destroyed *)
1280ab
+  | _ -> [||]
1280ab
+
1280ab
+let destroyed_at_raise = all_phys_regs
1280ab
+
1280ab
+(* Maximal register pressure *)
1280ab
+
1280ab
+let safe_register_pressure = function
1280ab
+  | Iextcall(_, _) -> 8
1280ab
+  | Ialloc _ -> 25
1280ab
+  | _ -> 26
1280ab
+
1280ab
+let max_register_pressure = function
1280ab
+  | Iextcall(_, _) -> [| 10; 8 |]
1280ab
+  | Ialloc _ -> [| 25; 32 |]
1280ab
+  | Iintoffloat | Ifloatofint
1280ab
+  | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
1280ab
+  | _ -> [| 26; 32 |]
1280ab
+
1280ab
+(* Layout of the stack *)
1280ab
+
1280ab
+let num_stack_slots = [| 0; 0 |]
1280ab
+let contains_calls = ref false
1280ab
+
1280ab
+(* Calling the assembler *)
1280ab
+
1280ab
+let assemble_file infile outfile =
1280ab
+  Ccomp.command (Config.asm ^ " -o " ^
1280ab
+                 Filename.quote outfile ^ " " ^ Filename.quote infile)
1280ab
+
1280ab
+
1280ab
+let init () = ()
1280ab
diff --git a/asmcomp/arm64/reload.ml b/asmcomp/arm64/reload.ml
1280ab
new file mode 100644
1280ab
index 0000000..ff9214e
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/reload.ml
1280ab
@@ -0,0 +1,16 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                                                                     *)
1280ab
+(*  Copyright 2013 Institut National de Recherche en Informatique et   *)
1280ab
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
1280ab
+(*  under the terms of the Q Public License version 1.0.               *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+(* Reloading for the ARM 64 bits *)
1280ab
+
1280ab
+let fundecl f =
1280ab
+  (new Reloadgen.reload_generic)#fundecl f
1280ab
diff --git a/asmcomp/arm64/scheduling.ml b/asmcomp/arm64/scheduling.ml
1280ab
new file mode 100644
1280ab
index 0000000..cc244be
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/scheduling.ml
1280ab
@@ -0,0 +1,18 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                                                                     *)
1280ab
+(*  Copyright 2013 Institut National de Recherche en Informatique et   *)
1280ab
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
1280ab
+(*  under the terms of the Q Public License version 1.0.               *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+let _ = let module M = Schedgen in () (* to create a dependency *)
1280ab
+
1280ab
+(* Scheduling is turned off because the processor schedules dynamically
1280ab
+   much better than what we could do. *)
1280ab
+
1280ab
+let fundecl f = f
1280ab
diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml
1280ab
new file mode 100644
1280ab
index 0000000..c74b282
1280ab
--- /dev/null
1280ab
+++ b/asmcomp/arm64/selection.ml
1280ab
@@ -0,0 +1,265 @@
1280ab
+(***********************************************************************)
1280ab
+(*                                                                     *)
1280ab
+(*                                OCaml                                *)
1280ab
+(*                                                                     *)
1280ab
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
1280ab
+(*                  Benedikt Meurer, University of Siegen              *)
1280ab
+(*                                                                     *)
1280ab
+(*    Copyright 2013 Institut National de Recherche en Informatique    *)
1280ab
+(*    et en Automatique. Copyright 2012 Benedikt Meurer. All rights    *)
1280ab
+(*    reserved.  This file is distributed  under the terms of the Q    *)
1280ab
+(*    Public License version 1.0.                                      *)
1280ab
+(*                                                                     *)
1280ab
+(***********************************************************************)
1280ab
+
1280ab
+(* Instruction selection for the ARM processor *)
1280ab
+
1280ab
+open Arch
1280ab
+open Cmm
1280ab
+open Mach
1280ab
+
1280ab
+let is_offset chunk n =
1280ab
+   (n >= -256 && n <= 255)               (* 9 bits signed unscaled *)
1280ab
+|| (n >= 0 &&
1280ab
+    match chunk with     (* 12 bits unsigned, scaled by chunk size *)
1280ab
+    | Byte_unsigned | Byte_signed ->
1280ab
+        n < 0x1000
1280ab
+    | Sixteen_unsigned | Sixteen_signed ->
1280ab
+        n land 1 = 0 && n lsr 1 < 0x1000
1280ab
+    | Thirtytwo_unsigned | Thirtytwo_signed | Single ->
1280ab
+        n land 3 = 0 && n lsr 2 < 0x1000
1280ab
+    | Word | Double | Double_u ->
1280ab
+        n land 7 = 0 && n lsr 3 < 0x1000)
1280ab
+
1280ab
+(* An automaton to recognize ( 0+1+0* | 1+0+1* )
1280ab
+
1280ab
+               0          1          0
1280ab
+              / \        / \        / \
1280ab
+              \ /        \ /        \ /
1280ab
+        -0--> [1] --1--> [2] --0--> [3]
1280ab
+       /     
1280ab
+     [0]
1280ab
+       \
1280ab
+        -1--> [4] --0--> [5] --1--> [6]
1280ab
+              / \        / \        / \
1280ab
+              \ /        \ /        \ /
1280ab
+               1          0          1
1280ab
+
1280ab
+The accepting states are 2, 3, 5 and 6. *)
1280ab
+
1280ab
+let auto_table = [|   (* accepting?, next on 0, next on 1 *)
1280ab
+  (* state 0 *) (false, 1, 4);
1280ab
+  (* state 1 *) (false, 1, 2);
1280ab
+  (* state 2 *) (true,  3, 2);
1280ab
+  (* state 3 *) (true,  3, 7);
1280ab
+  (* state 4 *) (false, 5, 4);
1280ab
+  (* state 5 *) (true,  5, 6);
1280ab
+  (* state 6 *) (true,  7, 6);
1280ab
+  (* state 7 *) (false, 7, 7)   (* error state *)
1280ab
+|]
1280ab
+
1280ab
+let rec run_automata nbits state input =
1280ab
+  let (acc, next0, next1) = auto_table.(state) in
1280ab
+  if nbits <= 0
1280ab
+  then acc 
1280ab
+  else run_automata (nbits - 1)
1280ab
+                    (if input land 1 = 0 then next0 else next1)
1280ab
+                    (input asr 1)
1280ab
+
1280ab
+(* We are very conservative wrt what ARM64 supports: we don't support
1280ab
+   repetitions of a 000111000 or 1110000111 pattern, just a single
1280ab
+   pattern of this kind. *)
1280ab
+
1280ab
+let is_logical_immediate n =
1280ab
+  n <> 0 && n <> -1 && run_automata 64 0 n  
1280ab
+
1280ab
+let is_intconst = function
1280ab
+    Cconst_int _ -> true
1280ab
+  | _ -> false
1280ab
+
1280ab
+let inline_ops =
1280ab
+  [ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap";
1280ab
+    "caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ]
1280ab
+
1280ab
+let use_direct_addressing symb =
1280ab
+  (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit symb
1280ab
+
1280ab
+(* Instruction selection *)
1280ab
+
1280ab
+class selector = object(self)
1280ab
+
1280ab
+inherit Selectgen.selector_generic as super
1280ab
+
1280ab
+method is_immediate n =
1280ab
+  let mn = -n in
1280ab
+  n land 0xFFF = n || n land 0xFFF_000 = n
1280ab
+  || mn land 0xFFF = mn || mn land 0xFFF_000 = mn
1280ab
+
1280ab
+method! is_simple_expr = function
1280ab
+  (* inlined floating-point ops are simple if their arguments are *)
1280ab
+  | Cop(Cextcall(fn, _, _, _), args) when List.mem fn inline_ops ->
1280ab
+      List.for_all self#is_simple_expr args
1280ab
+  | e -> super#is_simple_expr e
1280ab
+
1280ab
+method select_addressing chunk = function
1280ab
+  | Cop(Cadda, [Cconst_symbol s; Cconst_int n])
1280ab
+    when use_direct_addressing s ->
1280ab
+      (Ibased(s, n), Ctuple [])
1280ab
+  | Cop(Cadda, [arg; Cconst_int n])
1280ab
+    when is_offset chunk n ->
1280ab
+      (Iindexed n, arg)
1280ab
+  | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
1280ab
+    when is_offset chunk n ->
1280ab
+      (Iindexed n, Cop(Cadda, [arg1; arg2]))
1280ab
+  | Cconst_symbol s
1280ab
+    when use_direct_addressing s ->
1280ab
+      (Ibased(s, 0), Ctuple [])
1280ab
+  | arg ->
1280ab
+      (Iindexed 0, arg)
1280ab
+
1280ab
+method! select_operation op args =
1280ab
+  match op with
1280ab
+  (* Integer addition *)
1280ab
+  | Caddi | Cadda ->
1280ab
+      begin match args with
1280ab
+      (* Add immediate *)
1280ab
+      | [arg; Cconst_int n] | [Cconst_int n; arg] when self#is_immediate n ->
1280ab
+          ((if n >= 0 then Iintop_imm(Iadd, n) else Iintop_imm(Isub, -n)),
1280ab
+           [arg])
1280ab
+      (* Shift-add *)
1280ab
+      | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftadd, n)), [arg1; arg2])
1280ab
+      | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg1; arg2])
1280ab
+      | [Cop(Clsl, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftadd, n)), [arg2; arg1])
1280ab
+      | [Cop(Casr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg2; arg1])
1280ab
+      (* Multiply-add *)
1280ab
+      | [arg1; Cop(Cmuli, args2)] | [Cop(Cmuli, args2); arg1] ->
1280ab
+          begin match self#select_operation Cmuli args2 with
1280ab
+          | (Iintop_imm(Ilsl, l), [arg3]) ->
1280ab
+              (Ispecific(Ishiftarith(Ishiftadd, l)), [arg1; arg3])
1280ab
+          | (Iintop Imul, [arg3; arg4]) ->
1280ab
+              (Ispecific Imuladd, [arg3; arg4; arg1])
1280ab
+          | _ ->
1280ab
+              super#select_operation op args
1280ab
+          end
1280ab
+      | _ ->
1280ab
+          super#select_operation op args
1280ab
+      end
1280ab
+  (* Integer subtraction *)
1280ab
+  | Csubi | Csuba ->
1280ab
+      begin match args with
1280ab
+      (* Sub immediate *)
1280ab
+      | [arg; Cconst_int n] when self#is_immediate n ->
1280ab
+          ((if n >= 0 then Iintop_imm(Isub, n) else Iintop_imm(Iadd, -n)),
1280ab
+           [arg])
1280ab
+      (* Shift-sub *)
1280ab
+      | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftsub, n)), [arg1; arg2])
1280ab
+      | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftarith(Ishiftsub, -n)), [arg1; arg2])
1280ab
+      (* Multiply-sub *)
1280ab
+      | [arg1; Cop(Cmuli, args2)] ->
1280ab
+          begin match self#select_operation Cmuli args2 with
1280ab
+          | (Iintop_imm(Ilsl, l), [arg3]) ->
1280ab
+              (Ispecific(Ishiftarith(Ishiftsub, l)), [arg1; arg3])
1280ab
+          | (Iintop Imul, [arg3; arg4]) ->
1280ab
+              (Ispecific Imulsub, [arg3; arg4; arg1])
1280ab
+          | _ ->
1280ab
+              super#select_operation op args
1280ab
+          end
1280ab
+      | _ ->
1280ab
+          super#select_operation op args
1280ab
+      end
1280ab
+  (* Checkbounds *)
1280ab
+  | Ccheckbound _ ->
1280ab
+      begin match args with
1280ab
+      | [Cop(Clsr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
1280ab
+          (Ispecific(Ishiftcheckbound n), [arg1; arg2])
1280ab
+      | _ ->
1280ab
+          super#select_operation op args
1280ab
+      end
1280ab
+  (* Integer multiplication *)
1280ab
+  (* ARM does not support immediate operands for multiplication *)
1280ab
+  | Cmuli ->
1280ab
+      begin match args with
1280ab
+      | [arg; Cconst_int n] | [Cconst_int n; arg] ->
1280ab
+          let l = Misc.log2 n in
1280ab
+          if n = 1 lsl l
1280ab
+          then (Iintop_imm(Ilsl, l), [arg])
1280ab
+          else (Iintop Imul, args)
1280ab
+      | _ ->
1280ab
+          (Iintop Imul, args)
1280ab
+      end
1280ab
+  (* Division and modulus *)
1280ab
+  (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
1280ab
+  | Cdivi ->
1280ab
+      begin match args with
1280ab
+      | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
1280ab
+          ((if n = 1 then Imove else Iintop_imm(Idiv, n)), [arg])
1280ab
+      | _ ->
1280ab
+          (Iintop Idiv, args)
1280ab
+      end
1280ab
+  | Cmodi ->
1280ab
+      begin match args with
1280ab
+      | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
1280ab
+          ((if n = 1 then Iconst_int 0n else Iintop_imm(Imod, n)), [arg])
1280ab
+      | _ ->
1280ab
+          (Iintop Imod, args)
1280ab
+      end
1280ab
+  (* Bitwise logical operations have a different range of immediate
1280ab
+     operands than the other instructions *)
1280ab
+  | Cand -> self#select_logical Iand args
1280ab
+  | Cor -> self#select_logical Ior args
1280ab
+  | Cxor -> self#select_logical Ixor args
1280ab
+  (* Recognize floating-point negate and multiply *)
1280ab
+  | Cnegf ->
1280ab
+      begin match args with
1280ab
+      | [Cop(Cmulf, args)] -> (Ispecific Inegmulf, args)
1280ab
+      | _ -> super#select_operation op args
1280ab
+      end
1280ab
+  (* Recognize floating-point multiply and add/sub *)
1280ab
+  | Caddf ->
1280ab
+      begin match args with
1280ab
+      | [arg; Cop(Cmulf, args)] | [Cop(Cmulf, args); arg] ->
1280ab
+          (Ispecific Imuladdf, arg :: args)
1280ab
+      | _ ->
1280ab
+          super#select_operation op args
1280ab
+      end
1280ab
+  | Csubf ->
1280ab
+      begin match args with
1280ab
+      | [arg; Cop(Cmulf, args)] ->
1280ab
+          (Ispecific Imulsubf, arg :: args)
1280ab
+      | [Cop(Cmulf, args); arg] ->
1280ab
+          (Ispecific Inegmulsubf, arg :: args)
1280ab
+      | _ ->
1280ab
+          super#select_operation op args
1280ab
+      end
1280ab
+  (* Recognize floating-point square root *)
1280ab
+  | Cextcall("sqrt", _, _, _) ->
1280ab
+      (Ispecific Isqrtf, args)
1280ab
+  (* Recognize bswap instructions *)
1280ab
+  | Cextcall("caml_bswap16_direct", _, _, _) ->
1280ab
+      (Ispecific(Ibswap 16), args)
1280ab
+  | Cextcall("caml_int32_direct_bswap", _, _, _) ->
1280ab
+      (Ispecific(Ibswap 32), args)
1280ab
+  | Cextcall(("caml_int64_direct_bswap"|"caml_nativeint_direct_bswap"),
1280ab
+              _, _, _) ->
1280ab
+      (Ispecific (Ibswap 64), args)
1280ab
+  (* Other operations are regular *)
1280ab
+  | _ ->
1280ab
+      super#select_operation op args
1280ab
+
1280ab
+method select_logical op = function
1280ab
+  | [arg; Cconst_int n] when is_logical_immediate n ->
1280ab
+      (Iintop_imm(op, n), [arg])
1280ab
+  | [Cconst_int n; arg] when is_logical_immediate n ->
1280ab
+      (Iintop_imm(op, n), [arg])
1280ab
+  | args ->
1280ab
+      (Iintop op, args)
1280ab
+
1280ab
+end
1280ab
+
1280ab
+let fundecl f = (new selector)#emit_fundecl f
1280ab
diff --git a/asmcomp/compilenv.ml b/asmcomp/compilenv.ml
1280ab
index 17870c9..280b131 100644
1280ab
--- a/asmcomp/compilenv.ml
1280ab
+++ b/asmcomp/compilenv.ml
1280ab
@@ -83,6 +83,15 @@ let make_symbol ?(unitname = current_unit.ui_symbol) idopt =
1280ab
   | None -> prefix
1280ab
   | Some id -> prefix ^ "__" ^ id
1280ab
 
1280ab
+let symbol_in_current_unit name =
1280ab
+  let prefix = "caml" ^ current_unit.ui_symbol in
1280ab
+  name = prefix || 
1280ab
+  (let lp = String.length prefix in
1280ab
+   String.length name >= 2 + lp
1280ab
+   && String.sub name 0 lp = prefix
1280ab
+   && name.[lp] = '_'
1280ab
+   && name.[lp + 1] = '_')
1280ab
+
1280ab
 let read_unit_info filename =
1280ab
   let ic = open_in_bin filename in
1280ab
   try
1280ab
diff --git a/asmcomp/compilenv.mli b/asmcomp/compilenv.mli
1280ab
index 51cb8c6..9ffb145 100644
1280ab
--- a/asmcomp/compilenv.mli
1280ab
+++ b/asmcomp/compilenv.mli
1280ab
@@ -31,6 +31,10 @@ val make_symbol: ?unitname:string -> string option -> string
1280ab
            corresponds to symbol [id] in the compilation unit [u]
1280ab
            (or the current unit). *)
1280ab
 
1280ab
+val symbol_in_current_unit: string -> bool
1280ab
+        (* Return true if the given asm symbol belongs to the
1280ab
+           current compilation unit, false otherwise. *)
1280ab
+
1280ab
 val symbol_for_global: Ident.t -> string
1280ab
         (* Return the asm symbol that refers to the given global identifier *)
1280ab
 
1280ab
diff --git a/asmrun/arm64.S b/asmrun/arm64.S
1280ab
new file mode 100644
1280ab
index 0000000..de670e6
1280ab
--- /dev/null
1280ab
+++ b/asmrun/arm64.S
1280ab
@@ -0,0 +1,535 @@
1280ab
+/***********************************************************************/
1280ab
+/*                                                                     */
1280ab
+/*                                OCaml                                */
1280ab
+/*                                                                     */
1280ab
+/*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         */
1280ab
+/*                                                                     */
1280ab
+/*  Copyright 2013 Institut National de Recherche en Informatique et   */
1280ab
+/*  en Automatique.  All rights reserved.  This file is distributed    */
1280ab
+/*  under the terms of the GNU Library General Public License, with    */
1280ab
+/*  the special exception on linking described in file ../LICENSE.     */
1280ab
+/*                                                                     */
1280ab
+/***********************************************************************/
1280ab
+
1280ab
+/* Asm part of the runtime system, ARM processor, 64-bit mode */
1280ab
+/* Must be preprocessed by cpp */
1280ab
+
1280ab
+/* Special registers */
1280ab
+
1280ab
+#define TRAP_PTR x26
1280ab
+#define ALLOC_PTR x27
1280ab
+#define ALLOC_LIMIT x28
1280ab
+#define ARG x15
1280ab
+#define TMP x16
1280ab
+#define TMP2 x17
1280ab
+
1280ab
+/* Support for CFI directives */
1280ab
+
1280ab
+#if defined(ASM_CFI_SUPPORTED)
1280ab
+#define CFI_STARTPROC .cfi_startproc
1280ab
+#define CFI_ENDPROC .cfi_endproc
1280ab
+#define CFI_ADJUST(n) .cfi_adjust_cfa_offset n
1280ab
+#else
1280ab
+#define CFI_STARTPROC
1280ab
+#define CFI_ENDPROC
1280ab
+#define CFI_ADJUST(n)
1280ab
+#endif
1280ab
+
1280ab
+/* Support for profiling with gprof */
1280ab
+
1280ab
+#define PROFILE
1280ab
+
1280ab
+/* Macros to load and store global variables.  Destroy TMP2 */
1280ab
+
1280ab
+#if defined(__PIC__)
1280ab
+
1280ab
+#define ADDRGLOBAL(reg,symb) \
1280ab
+	adrp	TMP2, :got:symb; \
1280ab
+	ldr	reg, [TMP2, #:got_lo12:symb]
1280ab
+
1280ab
+#define LOADGLOBAL(reg,symb) \
1280ab
+	ADDRGLOBAL(TMP2,symb); \
1280ab
+	ldr	reg, [TMP2]
1280ab
+
1280ab
+#define STOREGLOBAL(reg,symb) \
1280ab
+	ADDRGLOBAL(TMP2,symb); \
1280ab
+	str	reg, [TMP2]
1280ab
+
1280ab
+#else
1280ab
+
1280ab
+#define ADDRGLOBAL(reg,symb) \
1280ab
+	adrp	reg, symb; \
1280ab
+	add	reg, reg, #:lo12:symb
1280ab
+
1280ab
+#define LOADGLOBAL(reg,symb) \
1280ab
+	adrp	TMP2, symb; \
1280ab
+	ldr	reg, [TMP2, #:lo12:symb]
1280ab
+
1280ab
+#define STOREGLOBAL(reg,symb) \
1280ab
+	adrp	TMP2, symb; \
1280ab
+	str	reg, [TMP2, #:lo12:symb]
1280ab
+
1280ab
+#endif
1280ab
+
1280ab
+/* Allocation functions and GC interface */
1280ab
+
1280ab
+        .globl  caml_system__code_begin
1280ab
+caml_system__code_begin:
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_call_gc
1280ab
+caml_call_gc:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Record return address */
1280ab
+	STOREGLOBAL(x30, caml_last_return_address)
1280ab
+.Lcaml_call_gc:
1280ab
+    /* Record lowest stack address */
1280ab
+	mov	TMP, sp
1280ab
+	STOREGLOBAL(TMP, caml_bottom_of_stack)
1280ab
+    /* Set up stack space, saving return address and frame pointer */
1280ab
+    /* (2 regs RA/GP, 24 allocatable int regs, 24 caller-save float regs) * 8 */
1280ab
+        stp     x29, x30, [sp, -400]!
1280ab
+        CFI_ADJUST(400)
1280ab
+        add     x29, sp, #0
1280ab
+    /* Save allocatable integer registers on the stack, in the order
1280ab
+       given in proc.ml */
1280ab
+	stp	x0, x1, [sp, 16]
1280ab
+        stp     x2, x3, [sp, 32]
1280ab
+        stp     x4, x5, [sp, 48]
1280ab
+        stp     x6, x7, [sp, 64]
1280ab
+        stp     x8, x9, [sp, 80]
1280ab
+        stp     x10, x11, [sp, 96]
1280ab
+        stp     x12, x13, [sp, 112]
1280ab
+        stp     x14, x15, [sp, 128]
1280ab
+        stp     x19, x20, [sp, 144]
1280ab
+        stp     x21, x22, [sp, 160]
1280ab
+        stp     x23, x24, [sp, 176]
1280ab
+        str     x25, [sp, 192]
1280ab
+     /* Save caller-save floating-point registers on the stack
1280ab
+        (callee-saves are preserved by caml_garbage_collection) */
1280ab
+        stp     d0, d1, [sp, 208]
1280ab
+        stp     d2, d3, [sp, 224]
1280ab
+        stp     d4, d5, [sp, 240]
1280ab
+        stp     d6, d7, [sp, 256]
1280ab
+        stp     d16, d17, [sp, 272]
1280ab
+        stp     d18, d19, [sp, 288]
1280ab
+        stp     d20, d21, [sp, 304]
1280ab
+        stp     d22, d23, [sp, 320]
1280ab
+        stp     d24, d25, [sp, 336]
1280ab
+        stp     d26, d27, [sp, 352]
1280ab
+        stp     d28, d29, [sp, 368]
1280ab
+        stp     d30, d31, [sp, 384]
1280ab
+    /* Store pointer to saved integer registers in caml_gc_regs */
1280ab
+	add	TMP, sp, #16
1280ab
+	STOREGLOBAL(TMP, caml_gc_regs)
1280ab
+    /* Save current allocation pointer for debugging purposes */
1280ab
+	STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+    /* Save trap pointer in case an exception is raised during GC */
1280ab
+	STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
1280ab
+    /* Call the garbage collector */
1280ab
+        bl      caml_garbage_collection
1280ab
+    /* Restore registers */
1280ab
+	ldp	x0, x1, [sp, 16]
1280ab
+        ldp     x2, x3, [sp, 32]
1280ab
+        ldp     x4, x5, [sp, 48]
1280ab
+        ldp     x6, x7, [sp, 64]
1280ab
+        ldp     x8, x9, [sp, 80]
1280ab
+        ldp     x10, x11, [sp, 96]
1280ab
+        ldp     x12, x13, [sp, 112]
1280ab
+        ldp     x14, x15, [sp, 128]
1280ab
+        ldp     x19, x20, [sp, 144]
1280ab
+        ldp     x21, x22, [sp, 160]
1280ab
+        ldp     x23, x24, [sp, 176]
1280ab
+        ldr     x25, [sp, 192]
1280ab
+        ldp     d0, d1, [sp, 208]
1280ab
+        ldp     d2, d3, [sp, 224]
1280ab
+        ldp     d4, d5, [sp, 240]
1280ab
+        ldp     d6, d7, [sp, 256]
1280ab
+        ldp     d16, d17, [sp, 272]
1280ab
+        ldp     d18, d19, [sp, 288]
1280ab
+        ldp     d20, d21, [sp, 304]
1280ab
+        ldp     d22, d23, [sp, 320]
1280ab
+        ldp     d24, d25, [sp, 336]
1280ab
+        ldp     d26, d27, [sp, 352]
1280ab
+        ldp     d28, d29, [sp, 368]
1280ab
+        ldp     d30, d31, [sp, 384]
1280ab
+    /* Reload new allocation pointer and allocation limit */
1280ab
+	LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+	LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
1280ab
+    /* Free stack space and return to caller */
1280ab
+	ldp     x29, x30, [sp], 400
1280ab
+        ret
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_call_gc, %function
1280ab
+        .size   caml_call_gc, .-caml_call_gc
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_alloc1
1280ab
+caml_alloc1:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+1:      sub     ALLOC_PTR, ALLOC_PTR, #16
1280ab
+        cmp     ALLOC_PTR, ALLOC_LIMIT
1280ab
+        b.lo    2f
1280ab
+        ret
1280ab
+2:      stp     x29, x30, [sp, -16]!
1280ab
+        CFI_ADJUST(16)
1280ab
+        add     x29, sp, #0
1280ab
+    /* Record return address */
1280ab
+	STOREGLOBAL(x30, caml_last_return_address)
1280ab
+    /* Call GC */
1280ab
+        bl      .Lcaml_call_gc
1280ab
+    /* Restore return address */
1280ab
+	ldp     x29, x30, [sp], 16
1280ab
+        CFI_ADJUST(-16)
1280ab
+    /* Try again */
1280ab
+        b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_alloc1, %function
1280ab
+        .size   caml_alloc1, .-caml_alloc1
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_alloc2
1280ab
+caml_alloc2:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+1:      sub     ALLOC_PTR, ALLOC_PTR, #24
1280ab
+        cmp     ALLOC_PTR, ALLOC_LIMIT
1280ab
+        b.lo    2f
1280ab
+        ret
1280ab
+2:      stp     x29, x30, [sp, -16]!
1280ab
+        CFI_ADJUST(16)
1280ab
+        add     x29, sp, #0
1280ab
+    /* Record return address */
1280ab
+	STOREGLOBAL(x30, caml_last_return_address)
1280ab
+    /* Call GC */
1280ab
+        bl      .Lcaml_call_gc
1280ab
+    /* Restore return address */
1280ab
+	ldp     x29, x30, [sp], 16
1280ab
+        CFI_ADJUST(-16)
1280ab
+    /* Try again */
1280ab
+        b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_alloc2, %function
1280ab
+        .size   caml_alloc2, .-caml_alloc2
1280ab
+	
1280ab
+        .align  2
1280ab
+        .globl  caml_alloc3
1280ab
+caml_alloc3:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+1:      sub     ALLOC_PTR, ALLOC_PTR, #32
1280ab
+        cmp     ALLOC_PTR, ALLOC_LIMIT
1280ab
+        b.lo    2f
1280ab
+        ret
1280ab
+2:      stp     x29, x30, [sp, -16]!
1280ab
+        CFI_ADJUST(16)
1280ab
+        add     x29, sp, #0
1280ab
+    /* Record return address */
1280ab
+	STOREGLOBAL(x30, caml_last_return_address)
1280ab
+    /* Call GC */
1280ab
+        bl      .Lcaml_call_gc
1280ab
+    /* Restore return address */
1280ab
+	ldp     x29, x30, [sp], 16
1280ab
+        CFI_ADJUST(-16)
1280ab
+    /* Try again */
1280ab
+        b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_alloc2, %function
1280ab
+        .size   caml_alloc2, .-caml_alloc2
1280ab
+	
1280ab
+        .align  2
1280ab
+        .globl  caml_allocN
1280ab
+caml_allocN:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+1:      sub     ALLOC_PTR, ALLOC_PTR, ARG
1280ab
+        cmp     ALLOC_PTR, ALLOC_LIMIT
1280ab
+        b.lo    2f
1280ab
+        ret
1280ab
+2:      stp     x29, x30, [sp, -16]!
1280ab
+        CFI_ADJUST(16)
1280ab
+        add     x29, sp, #0
1280ab
+    /* Record return address */
1280ab
+	STOREGLOBAL(x30, caml_last_return_address)
1280ab
+    /* Call GC.  This preserves ARG */
1280ab
+        bl      .Lcaml_call_gc
1280ab
+    /* Restore return address */
1280ab
+	ldp     x29, x30, [sp], 16
1280ab
+        CFI_ADJUST(-16)
1280ab
+    /* Try again */
1280ab
+        b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_allocN, %function
1280ab
+        .size   caml_allocN, .-caml_allocN
1280ab
+
1280ab
+/* Call a C function from OCaml */
1280ab
+/* Function to call is in ARG */
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_c_call
1280ab
+caml_c_call:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Preserve return address in callee-save register x19 */
1280ab
+        mov     x19, x30
1280ab
+    /* Record lowest stack address and return address */
1280ab
+        STOREGLOBAL(x30, caml_last_return_address)
1280ab
+        add     TMP, sp, #0
1280ab
+        STOREGLOBAL(TMP, caml_bottom_of_stack)
1280ab
+    /* Make the exception handler alloc ptr available to the C code */
1280ab
+	STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+        STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
1280ab
+    /* Call the function */
1280ab
+        blr     ARG
1280ab
+    /* Reload alloc ptr and alloc limit */
1280ab
+	LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+        LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
1280ab
+    /* Return */
1280ab
+        ret     x19
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_c_call, %function
1280ab
+        .size   caml_c_call, .-caml_c_call
1280ab
+
1280ab
+/* Start the OCaml program */
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_start_program
1280ab
+caml_start_program:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+	ADDRGLOBAL(ARG, caml_program)
1280ab
+
1280ab
+/* Code shared with caml_callback* */
1280ab
+/* Address of OCaml code to call is in ARG */
1280ab
+/* Arguments to the OCaml code are in x0...x7 */
1280ab
+
1280ab
+.Ljump_to_caml:
1280ab
+    /* Set up stack frame and save callee-save registers */
1280ab
+	stp     x29, x30, [sp, -160]!
1280ab
+        CFI_ADJUST(160)
1280ab
+	add     x29, sp, #0
1280ab
+        stp     x19, x20, [sp, 16]
1280ab
+        stp     x21, x22, [sp, 32]
1280ab
+        stp     x23, x24, [sp, 48]
1280ab
+        stp     x25, x26, [sp, 64]
1280ab
+        stp     x27, x28, [sp, 80]
1280ab
+	stp     d8, d9, [sp, 96]
1280ab
+        stp     d10, d11, [sp, 112]
1280ab
+        stp     d12, d13, [sp, 128]
1280ab
+        stp     d14, d15, [sp, 144]
1280ab
+    /* Setup a callback link on the stack */
1280ab
+	LOADGLOBAL(x8, caml_bottom_of_stack)
1280ab
+        LOADGLOBAL(x9, caml_last_return_address)
1280ab
+        LOADGLOBAL(x10, caml_gc_regs)
1280ab
+        stp     x8, x9, [sp, -32]!     /* 16-byte alignment */
1280ab
+        CFI_ADJUST(32)
1280ab
+        str     x10, [sp, 16]
1280ab
+    /* Setup a trap frame to catch exceptions escaping the OCaml code */
1280ab
+	LOADGLOBAL(x8, caml_exception_pointer)
1280ab
+        adr     x9, .Ltrap_handler
1280ab
+        stp     x8, x9, [sp, -16]!
1280ab
+        CFI_ADJUST(16)
1280ab
+        add     TRAP_PTR, sp, #0
1280ab
+    /* Reload allocation pointers */
1280ab
+	LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+        LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
1280ab
+    /* Call the OCaml code */
1280ab
+        blr     ARG
1280ab
+.Lcaml_retaddr:
1280ab
+    /* Pop the trap frame, restoring caml_exception_pointer */
1280ab
+	ldr     x8, [sp], 16
1280ab
+        CFI_ADJUST(-16)
1280ab
+        STOREGLOBAL(x8, caml_exception_pointer)
1280ab
+    /* Pop the callback link, restoring the global variables */
1280ab
+.Lreturn_result:
1280ab
+	ldr     x10, [sp, 16]
1280ab
+        ldp     x8, x9, [sp], 32
1280ab
+        CFI_ADJUST(-32)
1280ab
+	STOREGLOBAL(x8, caml_bottom_of_stack)
1280ab
+        STOREGLOBAL(x9, caml_last_return_address)
1280ab
+        STOREGLOBAL(x10, caml_gc_regs)
1280ab
+    /* Update allocation pointer */
1280ab
+	STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+    /* Reload callee-save registers and return address */
1280ab
+        ldp     x19, x20, [sp, 16]
1280ab
+        ldp     x21, x22, [sp, 32]
1280ab
+        ldp     x23, x24, [sp, 48]
1280ab
+        ldp     x25, x26, [sp, 64]
1280ab
+        ldp     x27, x28, [sp, 80]
1280ab
+	ldp     d8, d9, [sp, 96]
1280ab
+        ldp     d10, d11, [sp, 112]
1280ab
+        ldp     d12, d13, [sp, 128]
1280ab
+        ldp     d14, d15, [sp, 144]
1280ab
+	ldp     x29, x30, [sp], 160
1280ab
+        CFI_ADJUST(-160)
1280ab
+    /* Return to C caller */
1280ab
+        ret
1280ab
+        CFI_ENDPROC
1280ab
+        .type   .Lcaml_retaddr, %function
1280ab
+        .size   .Lcaml_retaddr, .-.Lcaml_retaddr
1280ab
+        .type   caml_start_program, %function
1280ab
+        .size   caml_start_program, .-caml_start_program
1280ab
+
1280ab
+/* The trap handler */
1280ab
+
1280ab
+        .align  2
1280ab
+.Ltrap_handler:
1280ab
+        CFI_STARTPROC
1280ab
+    /* Save exception pointer */
1280ab
+        STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
1280ab
+    /* Encode exception bucket as an exception result */
1280ab
+        orr     x0, x0, #2
1280ab
+    /* Return it */
1280ab
+        b       .Lreturn_result
1280ab
+        CFI_ENDPROC
1280ab
+        .type   .Ltrap_handler, %function
1280ab
+        .size   .Ltrap_handler, .-.Ltrap_handler
1280ab
+
1280ab
+/* Raise an exception from OCaml */
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_raise_exn
1280ab
+caml_raise_exn:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Test if backtrace is active */
1280ab
+        LOADGLOBAL(TMP, caml_backtrace_active)
1280ab
+        cbnz     TMP, 2f
1280ab
+1:  /* Cut stack at current trap handler */
1280ab
+        mov     sp, TRAP_PTR
1280ab
+    /* Pop previous handler and jump to it */
1280ab
+	ldr     TMP, [sp, 8]
1280ab
+        ldr     TRAP_PTR, [sp], 16
1280ab
+        br      TMP
1280ab
+2:  /* Preserve exception bucket in callee-save register x19 */
1280ab
+        mov     x19, x0
1280ab
+    /* Stash the backtrace */
1280ab
+                               /* arg1: exn bucket, already in x0 */
1280ab
+        mov     x1, x30        /* arg2: pc of raise */
1280ab
+        add     x2, sp, #0     /* arg3: sp of raise */
1280ab
+        mov     x3, TRAP_PTR   /* arg4: sp of handler */
1280ab
+        bl      caml_stash_backtrace
1280ab
+    /* Restore exception bucket and raise */
1280ab
+        mov     x0, x19
1280ab
+	b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_raise_exn, %function
1280ab
+        .size   caml_raise_exn, .-caml_raise_exn
1280ab
+
1280ab
+/* Raise an exception from C */
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_raise_exception
1280ab
+caml_raise_exception:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Reload trap ptr, alloc ptr and alloc limit */
1280ab
+        LOADGLOBAL(TRAP_PTR, caml_exception_pointer)
1280ab
+        LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
1280ab
+        LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
1280ab
+    /* Test if backtrace is active */
1280ab
+	LOADGLOBAL(TMP, caml_backtrace_active)
1280ab
+        cbnz    TMP, 2f
1280ab
+1:  /* Cut stack at current trap handler */
1280ab
+        mov     sp, TRAP_PTR
1280ab
+    /* Pop previous handler and jump to it */
1280ab
+	ldr     TMP, [sp, 8]
1280ab
+        ldr     TRAP_PTR, [sp], 16
1280ab
+        br      TMP
1280ab
+2:  /* Preserve exception bucket in callee-save register x19 */
1280ab
+        mov     x19, x0
1280ab
+    /* Stash the backtrace */
1280ab
+                               /* arg1: exn bucket, already in x0 */
1280ab
+        LOADGLOBAL(x1, caml_last_return_address)   /* arg2: pc of raise */
1280ab
+	LOADGLOBAL(x2, caml_bottom_of_stack)       /* arg3: sp of raise */
1280ab
+        mov     x3, TRAP_PTR   /* arg4: sp of handler */
1280ab
+        bl      caml_stash_backtrace
1280ab
+    /* Restore exception bucket and raise */
1280ab
+        mov     x0, x19
1280ab
+	b       1b
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_raise_exception, %function
1280ab
+        .size   caml_raise_exception, .-caml_raise_exception
1280ab
+
1280ab
+/* Callback from C to OCaml */
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_callback_exn
1280ab
+caml_callback_exn:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Initial shuffling of arguments (x0 = closure, x1 = first arg) */
1280ab
+	mov     TMP, x0
1280ab
+        mov     x0, x1          /* x0 = first arg */
1280ab
+        mov     x1, TMP         /* x1 = closure environment */
1280ab
+        ldr     ARG, [TMP]      /* code pointer */
1280ab
+        b       .Ljump_to_caml
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_callback_exn, %function
1280ab
+        .size   caml_callback_exn, .-caml_callback_exn
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_callback2_exn
1280ab
+caml_callback2_exn:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Initial shuffling of arguments (x0 = closure, x1 = arg1, x2 = arg2) */
1280ab
+	mov     TMP, x0
1280ab
+        mov     x0, x1          /* x0 = first arg */
1280ab
+	mov     x1, x2          /* x1 = second arg
1280ab
+        mov     x2, TMP         /* x2 = closure environment */
1280ab
+	ADDRGLOBAL(ARG, caml_apply2)
1280ab
+        b       .Ljump_to_caml
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_callback2_exn, %function
1280ab
+        .size   caml_callback2_exn, .-caml_callback2_exn
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_callback3_exn
1280ab
+caml_callback3_exn:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Initial shuffling of arguments */
1280ab
+    /* (x0 = closure, x1 = arg1, x2 = arg2, x3 = arg3) */
1280ab
+        mov     TMP, x0
1280ab
+        mov     x0, x1          /* x0 = first arg */
1280ab
+        mov     x1, x2          /* x1 = second arg */
1280ab
+        mov     x2, x3          /* x2 = third arg */
1280ab
+        mov     x3, TMP         /* x3 = closure environment */
1280ab
+	ADDRGLOBAL(ARG, caml_apply3)
1280ab
+        b       .Ljump_to_caml
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_callback3_exn, %function
1280ab
+        .size   caml_callback3_exn, .-caml_callback3_exn
1280ab
+
1280ab
+        .align  2
1280ab
+        .globl  caml_ml_array_bound_error
1280ab
+caml_ml_array_bound_error:
1280ab
+        CFI_STARTPROC
1280ab
+        PROFILE
1280ab
+    /* Load address of [caml_array_bound_error] in ARG */
1280ab
+        ADDRGLOBAL(ARG, caml_array_bound_error)
1280ab
+    /* Call that function */
1280ab
+        b       caml_c_call
1280ab
+        CFI_ENDPROC
1280ab
+        .type   caml_ml_array_bound_error, %function
1280ab
+        .size   caml_ml_array_bound_error, .-caml_ml_array_bound_error
1280ab
+
1280ab
+        .globl  caml_system__code_end
1280ab
+caml_system__code_end:
1280ab
+
1280ab
+/* GC roots for callback */
1280ab
+
1280ab
+        .data
1280ab
+        .align  3
1280ab
+        .globl  caml_system__frametable
1280ab
+caml_system__frametable:
1280ab
+        .quad   1               /* one descriptor */
1280ab
+        .quad   .Lcaml_retaddr  /* return address into callback */
1280ab
+        .short  -1              /* negative frame size => use callback link */
1280ab
+        .short  0               /* no roots */
1280ab
+        .align  3
1280ab
+        .type   caml_system__frametable, %object
1280ab
+        .size   caml_system__frametable, .-caml_system__frametable
1280ab
diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h
1280ab
index ff19847..68ec837 100644
1280ab
--- a/asmrun/signals_osdep.h
1280ab
+++ b/asmrun/signals_osdep.h
1280ab
@@ -92,6 +92,25 @@
1280ab
   #define CONTEXT_YOUNG_PTR (context->uc_mcontext.arm_r8)
1280ab
   #define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
1280ab
 
1280ab
+/****************** ARM64, Linux */
1280ab
+
1280ab
+#elif defined(TARGET_arm64) && defined(SYS_linux)
1280ab
+
1280ab
+  #include <sys/ucontext.h>
1280ab
+
1280ab
+  #define DECLARE_SIGNAL_HANDLER(name) \
1280ab
+    static void name(int sig, siginfo_t * info, ucontext_t * context)
1280ab
+
1280ab
+  #define SET_SIGACT(sigact,name) \
1280ab
+     sigact.sa_sigaction = (void (*)(int,siginfo_t *,void *)) (name); \
1280ab
+     sigact.sa_flags = SA_SIGINFO
1280ab
+
1280ab
+  typedef unsigned long context_reg;
1280ab
+  #define CONTEXT_PC (context->uc_mcontext.pc)
1280ab
+  #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.regs[26])
1280ab
+  #define CONTEXT_YOUNG_PTR (context->uc_mcontext.regs[27])
1280ab
+  #define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
1280ab
+
1280ab
 /****************** AMD64, Solaris x86 */
1280ab
 
1280ab
 #elif defined(TARGET_amd64) && defined (SYS_solaris)
1280ab
diff --git a/asmrun/stack.h b/asmrun/stack.h
1280ab
index 756db95..031e408 100644
1280ab
--- a/asmrun/stack.h
1280ab
+++ b/asmrun/stack.h
1280ab
@@ -65,6 +65,11 @@
1280ab
 #define Callback_link(sp) ((struct caml_context *)((sp) + 16))
1280ab
 #endif
1280ab
 
1280ab
+#ifdef TARGET_arm64
1280ab
+#define Saved_return_address(sp) *((intnat *)((sp) - 8))
1280ab
+#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
1280ab
+#endif
1280ab
+
1280ab
 /* Structure of OCaml callback contexts */
1280ab
 
1280ab
 struct caml_context {
1280ab
diff --git a/byterun/interp.c b/byterun/interp.c
1280ab
index b99ed2f..af9fa0f 100644
1280ab
--- a/byterun/interp.c
1280ab
+++ b/byterun/interp.c
1280ab
@@ -173,6 +173,12 @@ sp is a local copy of the global variable caml_extern_sp. */
1280ab
 #define SP_REG asm("%r14")
1280ab
 #define ACCU_REG asm("%r13")
1280ab
 #endif
1280ab
+#ifdef __aarch64__
1280ab
+#define PC_REG asm("%x19")
1280ab
+#define SP_REG asm("%x20")
1280ab
+#define ACCU_REG asm("%x21")
1280ab
+#define JUMPTBL_BASE_REG asm("%x22")
1280ab
+#endif
1280ab
 #endif
1280ab
 
1280ab
 /* Division and modulus madness */
1280ab
diff --git a/configure b/configure
1280ab
index 9bb9e9e..a0e1466 100755
1280ab
--- a/configure
1280ab
+++ b/configure
1280ab
@@ -661,6 +661,7 @@ if test $withsharedlibs = "yes"; then
1280ab
     x86_64-*-netbsd*)             natdynlink=true;;
1280ab
     i386-*-gnu0.3)                natdynlink=true;;
1280ab
     arm*-*-linux*)                natdynlink=true;;
1280ab
+    aarch64-*-linux*)             natdynlink=true;;
1280ab
   esac
1280ab
 fi
1280ab
 
1280ab
@@ -719,6 +720,7 @@ case "$host" in
1280ab
   x86_64-*-netbsd*)             arch=amd64; system=netbsd;;
1280ab
   x86_64-*-openbsd*)            arch=amd64; system=openbsd;;
1280ab
   x86_64-*-darwin*)             arch=amd64; system=macosx;;
1280ab
+  aarch64-*-linux*)             arch=arm64; system=linux;;
1280ab
   x86_64-*-cygwin*)             arch=amd64; system=cygwin;;
1280ab
 esac
1280ab
 
1280ab
@@ -772,7 +774,7 @@ case "$arch,$model,$system" in
1280ab
                     aspp='gcc -m64 -c';;
1280ab
   amd64,*,*)        as='as'
1280ab
                     aspp='gcc -c';;
1280ab
-  arm,*,*)          as='as';
1280ab
+  arm,*,*|arm64,*,*)as='as';
1280ab
                     aspp='gcc -c';;
1280ab
   i386,*,solaris)   as='as'
1280ab
                     aspp='/usr/ccs/bin/as -P';;
1280ab
@@ -1198,6 +1200,7 @@ case "$arch" in
1280ab
             fi;;
1280ab
   power)    bng_arch=ppc; bng_asm_level=1;;
1280ab
   amd64)    bng_arch=amd64; bng_asm_level=1;;
1280ab
+  arm64)    bng_arch=arm64; bng_asm_level=1;;
1280ab
   *)        bng_arch=generic; bng_asm_level=0;;
1280ab
 esac
1280ab
 
1280ab
diff --git a/otherlibs/num/bng.c b/otherlibs/num/bng.c
1280ab
index 5bbedb0..0483ef5 100644
1280ab
--- a/otherlibs/num/bng.c
1280ab
+++ b/otherlibs/num/bng.c
1280ab
@@ -23,12 +23,10 @@
1280ab
 #include "bng_amd64.c"
1280ab
 #elif defined(BNG_ARCH_ppc)
1280ab
 #include "bng_ppc.c"
1280ab
-#elif defined (BNG_ARCH_alpha)
1280ab
-#include "bng_alpha.c"
1280ab
 #elif defined (BNG_ARCH_sparc)
1280ab
 #include "bng_sparc.c"
1280ab
-#elif defined (BNG_ARCH_mips)
1280ab
-#include "bng_mips.c"
1280ab
+#elif defined (BNG_ARCH_arm64)
1280ab
+#include "bng_arm64.c"
1280ab
 #endif
1280ab
 #endif
1280ab
 
1280ab
diff --git a/otherlibs/num/bng_arm64.c b/otherlibs/num/bng_arm64.c
1280ab
new file mode 100644
1280ab
index 0000000..50843a0
1280ab
--- /dev/null
1280ab
+++ b/otherlibs/num/bng_arm64.c
1280ab
@@ -0,0 +1,20 @@
1280ab
+/***********************************************************************/
1280ab
+/*                                                                     */
1280ab
+/*                                OCaml                                */
1280ab
+/*                                                                     */
1280ab
+/*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         */
1280ab
+/*                                                                     */
1280ab
+/*  Copyright 2013 Institut National de Recherche en Informatique et   */
1280ab
+/*  en Automatique.  All rights reserved.  This file is distributed    */
1280ab
+/*  under the terms of the GNU Library General Public License, with    */
1280ab
+/*  the special exception on linking described in file ../../LICENSE.  */
1280ab
+/*                                                                     */
1280ab
+/***********************************************************************/
1280ab
+
1280ab
+/* Code specific for the ARM 64 (AArch64) architecture */
1280ab
+
1280ab
+#define BngMult(resh,resl,arg1,arg2)                                        \
1280ab
+  asm("mul %0, %2, %3 \n\t"                                                 \
1280ab
+      "umulh %1, %2, %3"                                                    \
1280ab
+      : "=&r" (resl), "=&r" (resh)                                          \
1280ab
+      : "r" (arg1), "r" (arg2))
1280ab
diff --git a/testsuite/tests/asmcomp/Makefile b/testsuite/tests/asmcomp/Makefile
1280ab
index 98c2e4c..15fcb7c 100644
1280ab
--- a/testsuite/tests/asmcomp/Makefile
1280ab
+++ b/testsuite/tests/asmcomp/Makefile
1280ab
@@ -128,7 +128,7 @@ parsecmm.mli parsecmm.ml: parsecmm.mly
1280ab
 lexcmm.ml: lexcmm.mll
1280ab
 	@$(OCAMLLEX) -q lexcmm.mll
1280ab
 
1280ab
-CASES=fib tak quicksort quicksort2 soli \
1280ab
+CASES=fib tak quicksort quicksort2 soli integr \
1280ab
       arith checkbound tagged-fib tagged-integr tagged-quicksort tagged-tak
1280ab
 ARGS_fib=-DINT_INT -DFUN=fib main.c
1280ab
 ARGS_tak=-DUNIT_INT -DFUN=takmain main.c
1280ab
diff --git a/testsuite/tests/asmcomp/arm64.S b/testsuite/tests/asmcomp/arm64.S
1280ab
new file mode 100644
1280ab
index 0000000..3bb4110
1280ab
--- /dev/null
1280ab
+++ b/testsuite/tests/asmcomp/arm64.S
1280ab
@@ -0,0 +1,52 @@
1280ab
+/***********************************************************************/
1280ab
+/*                                                                     */
1280ab
+/*                                OCaml                                */
1280ab
+/*                                                                     */
1280ab
+/*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         */
1280ab
+/*                                                                     */
1280ab
+/*  Copyright 2013 Institut National de Recherche en Informatique et   */
1280ab
+/*  en Automatique.  All rights reserved.  This file is distributed    */
1280ab
+/*  under the terms of the Q Public License version 1.0.               */
1280ab
+/*                                                                     */
1280ab
+/***********************************************************************/
1280ab
+
1280ab
+        .globl  call_gen_code
1280ab
+        .align  2
1280ab
+call_gen_code:  
1280ab
+    /* Set up stack frame and save callee-save registers */
1280ab
+	stp     x29, x30, [sp, -160]!
1280ab
+	add     x29, sp, #0
1280ab
+        stp     x19, x20, [sp, 16]
1280ab
+        stp     x21, x22, [sp, 32]
1280ab
+        stp     x23, x24, [sp, 48]
1280ab
+        stp     x25, x26, [sp, 64]
1280ab
+        stp     x27, x28, [sp, 80]
1280ab
+	stp     d8, d9, [sp, 96]
1280ab
+        stp     d10, d11, [sp, 112]
1280ab
+        stp     d12, d13, [sp, 128]
1280ab
+        stp     d14, d15, [sp, 144]
1280ab
+    /* Shuffle arguments */
1280ab
+	mov     x8, x0
1280ab
+        mov     x0, x1
1280ab
+        mov     x1, x2
1280ab
+        mov     x2, x3
1280ab
+        mov     x3, x4
1280ab
+    /* Call generated asm */
1280ab
+        blr     x8
1280ab
+    /* Reload callee-save registers and return address */
1280ab
+        ldp     x19, x20, [sp, 16]
1280ab
+        ldp     x21, x22, [sp, 32]
1280ab
+        ldp     x23, x24, [sp, 48]
1280ab
+        ldp     x25, x26, [sp, 64]
1280ab
+        ldp     x27, x28, [sp, 80]
1280ab
+	ldp     d8, d9, [sp, 96]
1280ab
+        ldp     d10, d11, [sp, 112]
1280ab
+        ldp     d12, d13, [sp, 128]
1280ab
+        ldp     d14, d15, [sp, 144]
1280ab
+	ldp     x29, x30, [sp], 160
1280ab
+        ret
1280ab
+
1280ab
+        .globl  caml_c_call
1280ab
+        .align  2
1280ab
+caml_c_call:
1280ab
+        br	x15
1280ab
diff --git a/testsuite/tests/asmcomp/main.ml b/testsuite/tests/asmcomp/main.ml
1280ab
index d67a643..82b699e 100644
1280ab
--- a/testsuite/tests/asmcomp/main.ml
1280ab
+++ b/testsuite/tests/asmcomp/main.ml
1280ab
@@ -13,6 +13,7 @@
1280ab
 open Clflags
1280ab
 
1280ab
 let compile_file filename =
1280ab
+  Clflags.dlcode := false;
1280ab
   Compilenv.reset "test";
1280ab
   Emit.begin_assembly();
1280ab
   let ic = open_in filename in
1280ab
-- 
66bad7
1.8.3.1
1280ab