From 10d852d542f4ecdc5efc5afbae2d42167df4539c Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 18 Jul 2013 16:09:20 +0000 Subject: [PATCH 09/11] Port to the ARM 64-bits (AArch64) architecture (experimental). Merge of branch branches/arm64. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@13909 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 (cherry picked from commit 055d5c0379e42b4f561cb1fc5159659d8e9a7b6f) --- asmcomp/arm64/arch.ml | 146 ++++++++ asmcomp/arm64/emit.mlp | 742 +++++++++++++++++++++++++++++++++++++++ asmcomp/arm64/proc.ml | 212 +++++++++++ asmcomp/arm64/reload.ml | 16 + asmcomp/arm64/scheduling.ml | 18 + asmcomp/arm64/selection.ml | 265 ++++++++++++++ asmcomp/compilenv.ml | 9 + asmcomp/compilenv.mli | 4 + asmrun/arm64.S | 535 ++++++++++++++++++++++++++++ asmrun/signals_osdep.h | 19 + asmrun/stack.h | 5 + byterun/interp.c | 6 + configure | 5 +- otherlibs/num/bng.c | 6 +- otherlibs/num/bng_arm64.c | 20 ++ testsuite/tests/asmcomp/Makefile | 2 +- testsuite/tests/asmcomp/arm64.S | 52 +++ testsuite/tests/asmcomp/main.ml | 1 + 18 files changed, 2057 insertions(+), 6 deletions(-) create mode 100644 asmcomp/arm64/arch.ml create mode 100644 asmcomp/arm64/emit.mlp create mode 100644 asmcomp/arm64/proc.ml create mode 100644 asmcomp/arm64/reload.ml create mode 100644 asmcomp/arm64/scheduling.ml create mode 100644 asmcomp/arm64/selection.ml create mode 100644 asmrun/arm64.S create mode 100644 otherlibs/num/bng_arm64.c create mode 100644 testsuite/tests/asmcomp/arm64.S diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml new file mode 100644 index 0000000..a53251f --- /dev/null +++ b/asmcomp/arm64/arch.ml @@ -0,0 +1,146 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* Benedikt Meurer, University of Siegen *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique *) +(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *) +(* reserved. This file is distributed under the terms of the Q *) +(* Public License version 1.0. *) +(* *) +(***********************************************************************) + +let command_line_options = [] + +(* Specific operations for the ARM processor, 64-bit mode *) + +open Format + +let command_line_options = [] + +(* Addressing modes *) + +type addressing_mode = + | Iindexed of int (* reg + displ *) + | Ibased of string * int (* global var + displ *) + +(* We do not support the reg + shifted reg addressing mode, because + what we really need is reg + shifted reg + displ, + and this is decomposed in two instructions (reg + shifted reg -> tmp, + then addressing tmp + displ). *) + +(* Specific operations *) + +type specific_operation = + | Ishiftarith of arith_operation * int + | Ishiftcheckbound of int + | Imuladd (* multiply and add *) + | Imulsub (* multiply and subtract *) + | Inegmulf (* floating-point negate and multiply *) + | Imuladdf (* floating-point multiply and add *) + | Inegmuladdf (* floating-point negate, multiply and add *) + | Imulsubf (* floating-point multiply and subtract *) + | Inegmulsubf (* floating-point negate, multiply and subtract *) + | Isqrtf (* floating-point square root *) + | Ibswap of int (* endianess conversion *) + +and arith_operation = + Ishiftadd + | Ishiftsub + +(* Sizes, endianness *) + +let big_endian = false + +let size_addr = 8 +let size_int = 8 +let size_float = 8 + +let allow_unaligned_access = false + +(* Behavior of division *) + +let division_crashes_on_overflow = false + +(* Operations on addressing modes *) + +let identity_addressing = Iindexed 0 + +let offset_addressing addr delta = + match addr with + | Iindexed n -> Iindexed(n + delta) + | Ibased(s, n) -> Ibased(s, n + delta) + +let num_args_addressing = function + | Iindexed n -> 1 + | Ibased(s, n) -> 0 + +(* Printing operations and addressing modes *) + +let print_addressing printreg addr ppf arg = + match addr with + | Iindexed n -> + printreg ppf arg.(0); + if n <> 0 then fprintf ppf " + %i" n + | Ibased(s, 0) -> + fprintf ppf "\"%s\"" s + | Ibased(s, n) -> + fprintf ppf "\"%s\" + %i" s n + +let print_specific_operation printreg op ppf arg = + match op with + | Ishiftarith(op, shift) -> + let op_name = function + | Ishiftadd -> "+" + | Ishiftsub -> "-" in + let shift_mark = + if shift >= 0 + then sprintf "<< %i" shift + else sprintf ">> %i" (-shift) in + fprintf ppf "%a %s %a %s" + printreg arg.(0) (op_name op) printreg arg.(1) shift_mark + | Ishiftcheckbound n -> + fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1) + | Imuladd -> + fprintf ppf "(%a * %a) + %a" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Imulsub -> + fprintf ppf "-(%a * %a) + %a" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Inegmulf -> + fprintf ppf "-f (%a *f %a)" + printreg arg.(0) + printreg arg.(1) + | Imuladdf -> + fprintf ppf "%a +f (%a *f %a)" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Inegmuladdf -> + fprintf ppf "(-f %a) -f (%a *f %a)" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Imulsubf -> + fprintf ppf "%a -f (%a *f %a)" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Inegmulsubf -> + fprintf ppf "(-f %a) +f (%a *f %a)" + printreg arg.(0) + printreg arg.(1) + printreg arg.(2) + | Isqrtf -> + fprintf ppf "sqrtf %a" + printreg arg.(0) + | Ibswap n -> + fprintf ppf "bswap%i %a" n + printreg arg.(0) + diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp new file mode 100644 index 0000000..fc9649c --- /dev/null +++ b/asmcomp/arm64/emit.mlp @@ -0,0 +1,742 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* Benedikt Meurer, University of Siegen *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique *) +(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *) +(* reserved. This file is distributed under the terms of the Q *) +(* Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Emission of ARM assembly code, 64-bit mode *) + +open Misc +open Cmm +open Arch +open Proc +open Reg +open Mach +open Linearize +open Emitaux + +(* Tradeoff between code size and code speed *) + +let fastcode_flag = ref true + +(* Names for special regs *) + +let reg_trap_ptr = phys_reg 23 +let reg_alloc_ptr = phys_reg 24 +let reg_alloc_limit = phys_reg 25 +let reg_tmp1 = phys_reg 26 +let reg_tmp2 = phys_reg 27 +let reg_x15 = phys_reg 15 + +(* Output a label *) + +let emit_label lbl = + emit_string ".L"; emit_int lbl + +let emit_data_label lbl = + emit_string ".Ld"; emit_int lbl + +(* Symbols *) + +let emit_symbol s = + Emitaux.emit_symbol '$' s + +(* Output a pseudo-register *) + +let emit_reg = function + {loc = Reg r} -> emit_string (register_name r) + | _ -> fatal_error "Emit.emit_reg" + +(* Likewise, but with the 32-bit name of the register *) + +let int_reg_name_w = + [| "w0"; "w1"; "w2"; "w3"; "w4"; "w5"; "w6"; "w7"; + "w8"; "w9"; "w10"; "w11"; "w12"; "w13"; "w14"; "w15"; + "w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25"; + "w26"; "w27"; "w28"; "w16"; "w17" |] + +let emit_wreg = function + {loc = Reg r} -> emit_string int_reg_name_w.(r) + | _ -> fatal_error "Emit.emit_wreg" + +(* Layout of the stack frame *) + +let stack_offset = ref 0 + +let frame_size () = + let sz = + !stack_offset + + 8 * num_stack_slots.(0) + + 8 * num_stack_slots.(1) + + (if !contains_calls then 8 else 0) + in Misc.align sz 16 + +let slot_offset loc cl = + match loc with + Incoming n -> + assert (n >= 0); + frame_size() + n + | Local n -> + !stack_offset + + (if cl = 0 + then n * 8 + else num_stack_slots.(0) * 8 + n * 8) + | Outgoing n -> + assert (n >= 0); + n + +(* Output a stack reference *) + +let emit_stack r = + match r.loc with + | Stack s -> + let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]` + | _ -> fatal_error "Emit.emit_stack" + +(* Output an addressing mode *) + +let emit_symbol_offset s ofs = + emit_symbol s; + if ofs > 0 then `+{emit_int ofs}` + else if ofs < 0 then `-{emit_int (-ofs)}` + else () + +let emit_addressing addr r = + match addr with + | Iindexed ofs -> + `[{emit_reg r}, #{emit_int ofs}]` + | Ibased(s, ofs) -> + `[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]` + +(* Record live pointers at call points *) + +let record_frame_label live dbg = + let lbl = new_label() in + let live_offset = ref [] in + Reg.Set.iter + (function + {typ = Addr; loc = Reg r} -> + live_offset := ((r lsl 1) + 1) :: !live_offset + | {typ = Addr; loc = Stack s} as reg -> + live_offset := slot_offset s (register_class reg) :: !live_offset + | _ -> ()) + live; + frame_descriptors := + { fd_lbl = lbl; + fd_frame_size = frame_size(); + fd_live_offset = !live_offset; + fd_debuginfo = dbg } :: !frame_descriptors; + lbl + +let record_frame live dbg = + let lbl = record_frame_label live dbg in `{emit_label lbl}:` + +(* Record calls to the GC -- we've moved them out of the way *) + +type gc_call = + { gc_lbl: label; (* Entry label *) + gc_return_lbl: label; (* Where to branch after GC *) + gc_frame_lbl: label } (* Label of frame descriptor *) + +let call_gc_sites = ref ([] : gc_call list) + +let emit_call_gc gc = + `{emit_label gc.gc_lbl}: bl {emit_symbol "caml_call_gc"}\n`; + `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n` + +(* Record calls to caml_ml_array_bound_error. + In debug mode, we maintain one call to caml_ml_array_bound_error + per bound check site. Otherwise, we can share a single call. *) + +type bound_error_call = + { bd_lbl: label; (* Entry label *) + bd_frame_lbl: label } (* Label of frame descriptor *) + +let bound_error_sites = ref ([] : bound_error_call list) + +let bound_error_label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in + let lbl_frame = record_frame_label Reg.Set.empty dbg in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; + lbl_bound_error + end else begin + let bd = List.hd !bound_error_sites in bd.bd_lbl + end + +let emit_call_bound_error bd = + `{emit_label bd.bd_lbl}: bl {emit_symbol "caml_ml_array_bound_error"}\n`; + `{emit_label bd.bd_frame_lbl}:\n` + +(* Names of various instructions *) + +let name_for_comparison = function + | Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le" + | Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt" + | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls" + | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi" + +let name_for_int_operation = function + | Iadd -> "add" + | Isub -> "sub" + | Imul -> "mul" + | Idiv -> "sdiv" + | Iand -> "and" + | Ior -> "orr" + | Ixor -> "eor" + | Ilsl -> "lsl" + | Ilsr -> "lsr" + | Iasr -> "asr" + | _ -> assert false + +(* Load an integer constant into a register *) + +let emit_intconst dst n = + let rec emit_pos first shift = + if shift < 0 then begin + if first then ` mov {emit_reg dst}, xzr\n` + end else begin + let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in + if s = 0n then emit_pos first (shift - 16) else begin + if first then + ` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n` + else + ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`; + emit_pos false (shift - 16) + end + end + and emit_neg first shift = + if shift < 0 then begin + if first then ` movn {emit_reg dst}, #0\n` + end else begin + let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in + if s = 0xFFFFn then emit_neg first (shift - 16) else begin + if first then + ` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n` + else + ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`; + emit_neg false (shift - 16) + end + end + in + if n < 0n then emit_neg true 48 else emit_pos true 48 + +(* Recognize float constants appropriate for FMOV dst, #fpimm instruction: + "a normalized binary floating point encoding with 1 sign bit, 4 + bits of fraction and a 3-bit exponent" *) + +let is_immediate_float bits = + let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in + let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in + exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant + +(* Adjust sp (up or down) by the given byte amount *) + +let emit_stack_adjustment n = + let instr = if n < 0 then "sub" else "add" in + let m = abs n in + assert (m < 0x1_000_000); + let ml = m land 0xFFF and mh = m land 0xFFF_000 in + if mh <> 0 then ` {emit_string instr} sp, sp, #{emit_int mh}\n`; + if ml <> 0 then ` {emit_string instr} sp, sp, #{emit_int ml}\n`; + if n <> 0 then cfi_adjust_cfa_offset (-n) + +(* Deallocate the stack frame and reload the return address + before a return or tail call *) + +let output_epilogue f = + let n = frame_size() in + if !contains_calls then + ` ldr x30, [sp, #{emit_int (n-8)}]\n`; + if n > 0 then + emit_stack_adjustment n; + f(); + (* reset CFA back because function body may continue *) + if n > 0 then cfi_adjust_cfa_offset n + +(* Name of current function *) +let function_name = ref "" +(* Entry point for tail recursive calls *) +let tailrec_entry_point = ref 0 +(* Pending floating-point literals *) +let float_literals = ref ([] : (int64 * label) list) + +(* Label a floating-point literal *) +let float_literal f = + try + List.assoc f !float_literals + with Not_found -> + let lbl = new_label() in + float_literals := (f, lbl) :: !float_literals; + lbl + +(* Emit all pending literals *) +let emit_literals() = + if !float_literals <> [] then begin + ` .align 3\n`; + List.iter + (fun (f, lbl) -> + `{emit_label lbl}: .quad `; emit_printf "0x%Lx\n" f) + !float_literals; + float_literals := [] + end + +(* Emit code to load the address of a symbol *) + +let emit_load_symbol_addr dst s = + if (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit s then begin + ` adrp {emit_reg dst}, {emit_symbol s}\n`; + ` add {emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n` + end else begin + ` adrp {emit_reg dst}, :got:{emit_symbol s}\n`; + ` ldr {emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n` + end + +(* Output the assembly code for an instruction *) + +let emit_instr i = + emit_debug_info i.dbg; + match i.desc with + | Lend -> () + | Lop(Imove | Ispill | Ireload) -> + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + match (src, dst) with + | {loc = Reg _; typ = Float}, {loc = Reg _} -> + ` fmov {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _}, {loc = Reg _} -> + ` mov {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _}, {loc = Stack _} -> + ` str {emit_reg src}, {emit_stack dst}\n` + | {loc = Stack _}, {loc = Reg _} -> + ` ldr {emit_reg dst}, {emit_stack src}\n` + | _ -> + assert false + end + | Lop(Iconst_int n) -> + emit_intconst i.res.(0) n + | Lop(Iconst_float f) -> + let b = Int64.bits_of_float(float_of_string f) in + if b = 0L then + ` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n` + else if is_immediate_float b then + ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n` + else begin + let lbl = float_literal b in + ` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`; + ` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}] /* {emit_string f} */\n` + end + | Lop(Iconst_symbol s) -> + emit_load_symbol_addr i.res.(0) s + | Lop(Icall_ind) -> + ` blr {emit_reg i.arg.(0)}\n`; + `{record_frame i.live i.dbg}\n` + | Lop(Icall_imm s) -> + ` bl {emit_symbol s}\n`; + `{record_frame i.live i.dbg}\n` + | Lop(Itailcall_ind) -> + output_epilogue (fun () -> ` br {emit_reg i.arg.(0)}\n`) + | Lop(Itailcall_imm s) -> + if s = !function_name then + ` b {emit_label !tailrec_entry_point}\n` + else + output_epilogue (fun () -> ` b {emit_symbol s}\n`) + | Lop(Iextcall(s, false)) -> + ` bl {emit_symbol s}\n` + | Lop(Iextcall(s, true)) -> + emit_load_symbol_addr reg_x15 s; + ` bl {emit_symbol "caml_c_call"}\n`; + `{record_frame i.live i.dbg}\n` + | Lop(Istackoffset n) -> + assert (n mod 16 = 0); + emit_stack_adjustment (-n); + stack_offset := !stack_offset + n + | Lop(Iload(size, addr)) -> + let dst = i.res.(0) in + let base = + match addr with + | Iindexed ofs -> i.arg.(0) + | Ibased(s, ofs) -> + ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`; + reg_tmp1 in + begin match size with + | Byte_unsigned -> + ` ldrb {emit_wreg dst}, {emit_addressing addr base}\n` + | Byte_signed -> + ` ldrsb {emit_reg dst}, {emit_addressing addr base}\n` + | Sixteen_unsigned -> + ` ldrh {emit_wreg dst}, {emit_addressing addr base}\n` + | Sixteen_signed -> + ` ldrsh {emit_reg dst}, {emit_addressing addr base}\n` + | Thirtytwo_unsigned -> + ` ldr {emit_wreg dst}, {emit_addressing addr base}\n` + | Thirtytwo_signed -> + ` ldrsw {emit_reg dst}, {emit_addressing addr base}\n` + | Single -> + ` ldr s7, {emit_addressing addr base}\n`; + ` fcvt {emit_reg dst}, s7\n` + | Word | Double | Double_u -> + ` ldr {emit_reg dst}, {emit_addressing addr base}\n` + end + | Lop(Istore(size, addr)) -> + let src = i.arg.(0) in + let base = + match addr with + | Iindexed ofs -> i.arg.(1) + | Ibased(s, ofs) -> + ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`; + reg_tmp1 in + begin match size with + | Byte_unsigned | Byte_signed -> + ` strb {emit_wreg src}, {emit_addressing addr base}\n` + | Sixteen_unsigned | Sixteen_signed -> + ` strh {emit_wreg src}, {emit_addressing addr base}\n` + | Thirtytwo_unsigned | Thirtytwo_signed -> + ` str {emit_wreg src}, {emit_addressing addr base}\n` + | Single -> + ` fcvt s7, {emit_reg src}\n`; + ` str s7, {emit_addressing addr base}\n`; + | Word | Double | Double_u -> + ` str {emit_reg src}, {emit_addressing addr base}\n` + end + | Lop(Ialloc n) -> + let lbl_frame = record_frame_label i.live i.dbg in + if !fastcode_flag then begin + let lbl_redo = new_label() in + let lbl_call_gc = new_label() in + `{emit_label lbl_redo}:`; + ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`; + ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`; + ` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`; + ` b.lo {emit_label lbl_call_gc}\n`; + call_gc_sites := + { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_redo; + gc_frame_lbl = lbl_frame } :: !call_gc_sites + end else begin + begin match n with + | 16 -> ` bl {emit_symbol "caml_alloc1"}\n` + | 24 -> ` bl {emit_symbol "caml_alloc2"}\n` + | 32 -> ` bl {emit_symbol "caml_alloc3"}\n` + | _ -> emit_intconst reg_x15 (Nativeint.of_int n); + ` bl {emit_symbol "caml_allocN"}\n` + end; + `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n` + end + | Lop(Iintop(Icomp cmp)) -> + ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` + | Lop(Iintop_imm(Icomp cmp, n)) -> + ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; + ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` + | Lop(Iintop Icheckbound) -> + let lbl = bound_error_label i.dbg in + ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` b.ls {emit_label lbl}\n` + | Lop(Iintop_imm(Icheckbound, n)) -> + let lbl = bound_error_label i.dbg in + ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; + ` b.ls {emit_label lbl}\n` + | Lop(Ispecific(Ishiftcheckbound shift)) -> + let lbl = bound_error_label i.dbg in + ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`; + ` b.cs {emit_label lbl}\n` + | Lop(Iintop Imod) -> + ` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *) + let l = Misc.log2 n in + ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`; + ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`; + ` asr {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_int l}\n` + | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *) + let l = Misc.log2 n in + ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`; + ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`; + ` asr {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_int l}\n`; + ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsl {emit_int l}\n` + | Lop(Iintop op) -> + let instr = name_for_int_operation op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop_imm(op, n)) -> + let instr = name_for_int_operation op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n` + | Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) -> + let instr = (match op with + | Ifloatofint -> "scvtf" + | Iintoffloat -> "fcvtzs" + | Iabsf -> "fabs" + | Inegf -> "fneg" + | Ispecific Isqrtf -> "fsqrt" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) -> + let instr = (match op with + | Iaddf -> "fadd" + | Isubf -> "fsub" + | Imulf -> "fmul" + | Idivf -> "fdiv" + | Ispecific Inegmulf -> "fnmul" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) -> + let instr = (match op with + | Imuladdf -> "fmadd" + | Inegmuladdf -> "fnmadd" + | Imulsubf -> "fmsub" + | Inegmulsubf -> "fnmsub" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n` + | Lop(Ispecific(Ishiftarith(op, shift))) -> + let instr = (match op with + Ishiftadd -> "add" + | Ishiftsub -> "sub") in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`; + if shift >= 0 + then `, lsl #{emit_int shift}\n` + else `, asr #{emit_int (-shift)}\n` + | Lop(Ispecific(Imuladd | Imulsub as op)) -> + let instr = (match op with + Imuladd -> "madd" + | Imulsub -> "msub" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` + | Lop(Ispecific(Ibswap size)) -> + begin match size with + | 16 -> + ` rev16 {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`; + ` ubfm {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #16\n` + | 32 -> + ` rev {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n` + | 64 -> + ` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | _ -> + assert false + end + | Lreloadretaddr -> + () + | Lreturn -> + output_epilogue (fun () -> ` ret\n`) + | Llabel lbl -> + `{emit_label lbl}:\n` + | Lbranch lbl -> + ` b {emit_label lbl}\n` + | Lcondbranch(tst, lbl) -> + begin match tst with + | Itruetest -> + ` cbnz {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Ifalsetest -> + ` cbz {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Iinttest cmp -> + ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + let comp = name_for_comparison cmp in + ` b.{emit_string comp} {emit_label lbl}\n` + | Iinttest_imm(cmp, n) -> + ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; + let comp = name_for_comparison cmp in + ` b.{emit_string comp} {emit_label lbl}\n` + | Ifloattest(cmp, neg) -> + let comp = (match (cmp, neg) with + | (Ceq, false) | (Cne, true) -> "eq" + | (Cne, false) | (Ceq, true) -> "ne" + | (Clt, false) -> "cc" + | (Clt, true) -> "cs" + | (Cle, false) -> "ls" + | (Cle, true) -> "hi" + | (Cgt, false) -> "gt" + | (Cgt, true) -> "le" + | (Cge, false) -> "ge" + | (Cge, true) -> "lt") in + ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` b.{emit_string comp} {emit_label lbl}\n` + | Ioddtest -> + ` tbnz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n` + | Ieventest -> + ` tbz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n` + end + | Lcondbranch3(lbl0, lbl1, lbl2) -> + ` cmp {emit_reg i.arg.(0)}, #1\n`; + begin match lbl0 with + None -> () + | Some lbl -> ` b.lt {emit_label lbl}\n` + end; + begin match lbl1 with + None -> () + | Some lbl -> ` b.eq {emit_label lbl}\n` + end; + begin match lbl2 with + None -> () + | Some lbl -> ` b.gt {emit_label lbl}\n` + end + | Lswitch jumptbl -> + let lbltbl = new_label() in + ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`; + ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`; + ` br {emit_reg reg_tmp1}\n`; + `{emit_label lbltbl}:`; + for j = 0 to Array.length jumptbl - 1 do + ` b {emit_label jumptbl.(j)}\n` + done +(* Alternative: + let lbltbl = new_label() in + ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`; + ` ldr {emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`; + ` add {emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`; + ` br {emit_reg reg_tmp1}\n`; + `{emit_label lbltbl}:\n`; + for j = 0 to Array.length jumptbl - 1 do + ` .word {emit_label jumptbl.(j)} - {emit_label lbltbl}\n` + done +*) + | Lsetuptrap lbl -> + let lblnext = new_label() in + ` adr {emit_reg reg_tmp1}, {emit_label lblnext}\n`; + ` b {emit_label lbl}\n`; + `{emit_label lblnext}:\n` + | Lpushtrap -> + stack_offset := !stack_offset + 16; + ` str {emit_reg reg_trap_ptr}, [sp, -16]!\n`; + ` str {emit_reg reg_tmp1}, [sp, #8]\n`; + cfi_adjust_cfa_offset 16; + ` mov {emit_reg reg_trap_ptr}, sp\n` + | Lpoptrap -> + ` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`; + cfi_adjust_cfa_offset (-16); + stack_offset := !stack_offset - 16 + | Lraise -> + if !Clflags.debug then begin + ` bl {emit_symbol "caml_raise_exn"}\n`; + `{record_frame Reg.Set.empty i.dbg}\n` + end else begin + ` mov sp, {emit_reg reg_trap_ptr}\n`; + ` ldr {emit_reg reg_tmp1}, [sp, #8]\n`; + ` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`; + ` br {emit_reg reg_tmp1}\n` + end + +(* Emission of an instruction sequence *) + +let rec emit_all i = + if i.desc = Lend then () else (emit_instr i; emit_all i.next) + +(* Emission of the profiling prelude *) + +let emit_profile() = () (* TODO *) +(* + match Config.system with + "linux_eabi" | "linux_eabihf" -> + ` push \{lr}\n`; + ` {emit_call "__gnu_mcount_nc"}\n` + | _ -> () +*) + +(* Emission of a function declaration *) + +let fundecl fundecl = + function_name := fundecl.fun_name; + fastcode_flag := fundecl.fun_fast; + tailrec_entry_point := new_label(); + float_literals := []; + stack_offset := 0; + call_gc_sites := []; + bound_error_sites := []; + ` .text\n`; + ` .align 2\n`; + ` .globl {emit_symbol fundecl.fun_name}\n`; + ` .type {emit_symbol fundecl.fun_name}, %function\n`; + `{emit_symbol fundecl.fun_name}:\n`; + emit_debug_info fundecl.fun_dbg; + cfi_startproc(); + if !Clflags.gprofile then emit_profile(); + let n = frame_size() in + if n > 0 then + emit_stack_adjustment (-n); + if !contains_calls then + ` str x30, [sp, #{emit_int (n-8)}]\n`; + `{emit_label !tailrec_entry_point}:\n`; + emit_all fundecl.fun_body; + List.iter emit_call_gc !call_gc_sites; + List.iter emit_call_bound_error !bound_error_sites; + cfi_endproc(); + ` .type {emit_symbol fundecl.fun_name}, %function\n`; + ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; + emit_literals() + +(* Emission of data *) + +let emit_item = function + | Cglobal_symbol s -> ` .globl {emit_symbol s}\n`; + | Cdefine_symbol s -> `{emit_symbol s}:\n` + | Cdefine_label lbl -> `{emit_data_label lbl}:\n` + | Cint8 n -> ` .byte {emit_int n}\n` + | Cint16 n -> ` .short {emit_int n}\n` + | Cint32 n -> ` .long {emit_nativeint n}\n` + | Cint n -> ` .quad {emit_nativeint n}\n` + | Csingle f -> emit_float32_directive ".long" f + | Cdouble f -> emit_float64_directive ".quad" f + | Csymbol_address s -> ` .quad {emit_symbol s}\n` + | Clabel_address lbl -> ` .quad {emit_data_label lbl}\n` + | Cstring s -> emit_string_directive " .ascii " s + | Cskip n -> if n > 0 then ` .space {emit_int n}\n` + | Calign n -> ` .align {emit_int(Misc.log2 n)}\n` + +let data l = + ` .data\n`; + List.iter emit_item l + +(* Beginning / end of an assembly file *) + +let begin_assembly() = + reset_debug_info(); + let lbl_begin = Compilenv.make_symbol (Some "data_begin") in + ` .data\n`; + ` .globl {emit_symbol lbl_begin}\n`; + `{emit_symbol lbl_begin}:\n`; + let lbl_begin = Compilenv.make_symbol (Some "code_begin") in + ` .text\n`; + ` .globl {emit_symbol lbl_begin}\n`; + `{emit_symbol lbl_begin}:\n` + +let end_assembly () = + let lbl_end = Compilenv.make_symbol (Some "code_end") in + ` .text\n`; + ` .globl {emit_symbol lbl_end}\n`; + `{emit_symbol lbl_end}:\n`; + let lbl_end = Compilenv.make_symbol (Some "data_end") in + ` .data\n`; + ` .globl {emit_symbol lbl_end}\n`; + `{emit_symbol lbl_end}:\n`; + ` .long 0\n`; + let lbl = Compilenv.make_symbol (Some "frametable") in + ` .globl {emit_symbol lbl}\n`; + `{emit_symbol lbl}:\n`; + emit_frames + { efa_label = (fun lbl -> + ` .type {emit_label lbl}, %function\n`; + ` .quad {emit_label lbl}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .quad {emit_int n}\n`); + efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`); + efa_label_rel = (fun lbl ofs -> + ` .long {emit_label lbl} - . + {emit_int32 ofs}\n`); + efa_def_label = (fun lbl -> `{emit_label lbl}:\n`); + efa_string = (fun s -> emit_string_directive " .asciz " s) }; + ` .type {emit_symbol lbl}, %object\n`; + ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`; + begin match Config.system with + | "linux" -> + (* Mark stack as non-executable *) + ` .section .note.GNU-stack,\"\",%progbits\n` + | _ -> () + end diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml new file mode 100644 index 0000000..b52c2fd --- /dev/null +++ b/asmcomp/arm64/proc.ml @@ -0,0 +1,212 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* Benedikt Meurer, University of Siegen *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique *) +(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *) +(* reserved. This file is distributed under the terms of the Q *) +(* Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Description of the ARM processor in 64-bit mode *) + +open Misc +open Cmm +open Reg +open Arch +open Mach + +(* Instruction selection *) + +let word_addressed = false + +(* Registers available for register allocation *) + +(* Integer register map: + x0 - x15 general purpose (caller-save) + x16, x17 temporaries (used by call veeners) + x18 platform register (reserved) + x19 - x25 general purpose (callee-save) + x26 trap pointer + x27 alloc pointer + x28 alloc limit + x29 frame pointer + x30 return address + sp / xzr stack pointer / zero register + Floating-point register map: + d0 - d7 general purpose (caller-save) + d8 - d15 general purpose (callee-save) + d16 - d31 generat purpose (caller-save) +*) + +let int_reg_name = + [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; + "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; + "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; + "x26"; "x27"; "x28"; "x16"; "x17" |] + +let float_reg_name = + [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7"; + "d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15"; + "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23"; + "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |] + +let num_register_classes = 2 + +let register_class r = + match r.typ with + | (Int | Addr) -> 0 + | Float -> 1 + +let num_available_registers = + [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *) + +let first_available_register = + [| 0; 100 |] + +let register_name r = + if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) + +let rotate_registers = true + +(* Representation of hard registers by pseudo-registers *) + +let hard_int_reg = + let v = Array.create 28 Reg.dummy in + for i = 0 to 27 do + v.(i) <- Reg.at_location Int (Reg i) + done; + v + +let hard_float_reg = + let v = Array.create 32 Reg.dummy in + for i = 0 to 31 do + v.(i) <- Reg.at_location Float (Reg(100 + i)) + done; + v + +let all_phys_regs = + Array.append hard_int_reg hard_float_reg + +let phys_reg n = + if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) + +let reg_x15 = phys_reg 15 +let reg_d7 = phys_reg 107 + +let stack_slot slot ty = + Reg.at_location ty (Stack slot) + +(* Calling conventions *) + +let calling_conventions + first_int last_int first_float last_float make_stack arg = + let loc = Array.create (Array.length arg) Reg.dummy in + let int = ref first_int in + let float = ref first_float in + let ofs = ref 0 in + for i = 0 to Array.length arg - 1 do + match arg.(i).typ with + Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- phys_reg !int; + incr int + end else begin + loc.(i) <- stack_slot (make_stack !ofs) ty; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- phys_reg !float; + incr float + end else begin + loc.(i) <- stack_slot (make_stack !ofs) Float; + ofs := !ofs + size_float + end + done; + (loc, Misc.align !ofs 16) (* keep stack 16-aligned *) + +let incoming ofs = Incoming ofs +let outgoing ofs = Outgoing ofs +let not_supported ofs = fatal_error "Proc.loc_results: cannot call" + +(* OCaml calling convention: + first integer args in r0...r15 + first float args in d0...d15 + remaining args on stack. + Return values in r0...r15 or d0...d15. *) + +let loc_arguments arg = + calling_conventions 0 15 100 115 outgoing arg +let loc_parameters arg = + let (loc, _) = calling_conventions 0 15 100 115 incoming arg in loc +let loc_results res = + let (loc, _) = calling_conventions 0 15 100 115 not_supported res in loc + +(* C calling convention: + first integer args in r0...r7 + first float args in d0...d7 + remaining args on stack. + Return values in r0...r1 or d0. *) + +let loc_external_arguments arg = + calling_conventions 0 7 100 107 outgoing arg +let loc_external_results res = + let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc + +let loc_exn_bucket = phys_reg 0 + +(* Registers destroyed by operations *) + +let destroyed_at_c_call = + (* x19-x28, d8-d15 preserved *) + Array.of_list (List.map phys_reg + [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15; + 100;101;102;103;104;105;106;107; + 116;117;118;119;120;121;122;123; + 124;125;126;127;128;129;130;131]) + +let destroyed_at_oper = function + | Iop(Icall_ind | Icall_imm _) | Iop(Iextcall(_, true)) -> + all_phys_regs + | Iop(Iextcall(_, false)) -> + destroyed_at_c_call + | Iop(Ialloc _) -> + [| reg_x15 |] + | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> + [| reg_d7 |] (* d7 / s7 destroyed *) + | _ -> [||] + +let destroyed_at_raise = all_phys_regs + +(* Maximal register pressure *) + +let safe_register_pressure = function + | Iextcall(_, _) -> 8 + | Ialloc _ -> 25 + | _ -> 26 + +let max_register_pressure = function + | Iextcall(_, _) -> [| 10; 8 |] + | Ialloc _ -> [| 25; 32 |] + | Iintoffloat | Ifloatofint + | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |] + | _ -> [| 26; 32 |] + +(* Layout of the stack *) + +let num_stack_slots = [| 0; 0 |] +let contains_calls = ref false + +(* Calling the assembler *) + +let assemble_file infile outfile = + Ccomp.command (Config.asm ^ " -o " ^ + Filename.quote outfile ^ " " ^ Filename.quote infile) + + +let init () = () diff --git a/asmcomp/arm64/reload.ml b/asmcomp/arm64/reload.ml new file mode 100644 index 0000000..ff9214e --- /dev/null +++ b/asmcomp/arm64/reload.ml @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Reloading for the ARM 64 bits *) + +let fundecl f = + (new Reloadgen.reload_generic)#fundecl f diff --git a/asmcomp/arm64/scheduling.ml b/asmcomp/arm64/scheduling.ml new file mode 100644 index 0000000..cc244be --- /dev/null +++ b/asmcomp/arm64/scheduling.ml @@ -0,0 +1,18 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +let _ = let module M = Schedgen in () (* to create a dependency *) + +(* Scheduling is turned off because the processor schedules dynamically + much better than what we could do. *) + +let fundecl f = f diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml new file mode 100644 index 0000000..c74b282 --- /dev/null +++ b/asmcomp/arm64/selection.ml @@ -0,0 +1,265 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* Benedikt Meurer, University of Siegen *) +(* *) +(* Copyright 2013 Institut National de Recherche en Informatique *) +(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *) +(* reserved. This file is distributed under the terms of the Q *) +(* Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Instruction selection for the ARM processor *) + +open Arch +open Cmm +open Mach + +let is_offset chunk n = + (n >= -256 && n <= 255) (* 9 bits signed unscaled *) +|| (n >= 0 && + match chunk with (* 12 bits unsigned, scaled by chunk size *) + | Byte_unsigned | Byte_signed -> + n < 0x1000 + | Sixteen_unsigned | Sixteen_signed -> + n land 1 = 0 && n lsr 1 < 0x1000 + | Thirtytwo_unsigned | Thirtytwo_signed | Single -> + n land 3 = 0 && n lsr 2 < 0x1000 + | Word | Double | Double_u -> + n land 7 = 0 && n lsr 3 < 0x1000) + +(* An automaton to recognize ( 0+1+0* | 1+0+1* ) + + 0 1 0 + / \ / \ / \ + \ / \ / \ / + -0--> [1] --1--> [2] --0--> [3] + / + [0] + \ + -1--> [4] --0--> [5] --1--> [6] + / \ / \ / \ + \ / \ / \ / + 1 0 1 + +The accepting states are 2, 3, 5 and 6. *) + +let auto_table = [| (* accepting?, next on 0, next on 1 *) + (* state 0 *) (false, 1, 4); + (* state 1 *) (false, 1, 2); + (* state 2 *) (true, 3, 2); + (* state 3 *) (true, 3, 7); + (* state 4 *) (false, 5, 4); + (* state 5 *) (true, 5, 6); + (* state 6 *) (true, 7, 6); + (* state 7 *) (false, 7, 7) (* error state *) +|] + +let rec run_automata nbits state input = + let (acc, next0, next1) = auto_table.(state) in + if nbits <= 0 + then acc + else run_automata (nbits - 1) + (if input land 1 = 0 then next0 else next1) + (input asr 1) + +(* We are very conservative wrt what ARM64 supports: we don't support + repetitions of a 000111000 or 1110000111 pattern, just a single + pattern of this kind. *) + +let is_logical_immediate n = + n <> 0 && n <> -1 && run_automata 64 0 n + +let is_intconst = function + Cconst_int _ -> true + | _ -> false + +let inline_ops = + [ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap"; + "caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ] + +let use_direct_addressing symb = + (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit symb + +(* Instruction selection *) + +class selector = object(self) + +inherit Selectgen.selector_generic as super + +method is_immediate n = + let mn = -n in + n land 0xFFF = n || n land 0xFFF_000 = n + || mn land 0xFFF = mn || mn land 0xFFF_000 = mn + +method! is_simple_expr = function + (* inlined floating-point ops are simple if their arguments are *) + | Cop(Cextcall(fn, _, _, _), args) when List.mem fn inline_ops -> + List.for_all self#is_simple_expr args + | e -> super#is_simple_expr e + +method select_addressing chunk = function + | Cop(Cadda, [Cconst_symbol s; Cconst_int n]) + when use_direct_addressing s -> + (Ibased(s, n), Ctuple []) + | Cop(Cadda, [arg; Cconst_int n]) + when is_offset chunk n -> + (Iindexed n, arg) + | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])]) + when is_offset chunk n -> + (Iindexed n, Cop(Cadda, [arg1; arg2])) + | Cconst_symbol s + when use_direct_addressing s -> + (Ibased(s, 0), Ctuple []) + | arg -> + (Iindexed 0, arg) + +method! select_operation op args = + match op with + (* Integer addition *) + | Caddi | Cadda -> + begin match args with + (* Add immediate *) + | [arg; Cconst_int n] | [Cconst_int n; arg] when self#is_immediate n -> + ((if n >= 0 then Iintop_imm(Iadd, n) else Iintop_imm(Isub, -n)), + [arg]) + (* Shift-add *) + | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftadd, n)), [arg1; arg2]) + | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg1; arg2]) + | [Cop(Clsl, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftadd, n)), [arg2; arg1]) + | [Cop(Casr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg2; arg1]) + (* Multiply-add *) + | [arg1; Cop(Cmuli, args2)] | [Cop(Cmuli, args2); arg1] -> + begin match self#select_operation Cmuli args2 with + | (Iintop_imm(Ilsl, l), [arg3]) -> + (Ispecific(Ishiftarith(Ishiftadd, l)), [arg1; arg3]) + | (Iintop Imul, [arg3; arg4]) -> + (Ispecific Imuladd, [arg3; arg4; arg1]) + | _ -> + super#select_operation op args + end + | _ -> + super#select_operation op args + end + (* Integer subtraction *) + | Csubi | Csuba -> + begin match args with + (* Sub immediate *) + | [arg; Cconst_int n] when self#is_immediate n -> + ((if n >= 0 then Iintop_imm(Isub, n) else Iintop_imm(Iadd, -n)), + [arg]) + (* Shift-sub *) + | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftsub, n)), [arg1; arg2]) + | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 -> + (Ispecific(Ishiftarith(Ishiftsub, -n)), [arg1; arg2]) + (* Multiply-sub *) + | [arg1; Cop(Cmuli, args2)] -> + begin match self#select_operation Cmuli args2 with + | (Iintop_imm(Ilsl, l), [arg3]) -> + (Ispecific(Ishiftarith(Ishiftsub, l)), [arg1; arg3]) + | (Iintop Imul, [arg3; arg4]) -> + (Ispecific Imulsub, [arg3; arg4; arg1]) + | _ -> + super#select_operation op args + end + | _ -> + super#select_operation op args + end + (* Checkbounds *) + | Ccheckbound _ -> + begin match args with + | [Cop(Clsr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 -> + (Ispecific(Ishiftcheckbound n), [arg1; arg2]) + | _ -> + super#select_operation op args + end + (* Integer multiplication *) + (* ARM does not support immediate operands for multiplication *) + | Cmuli -> + begin match args with + | [arg; Cconst_int n] | [Cconst_int n; arg] -> + let l = Misc.log2 n in + if n = 1 lsl l + then (Iintop_imm(Ilsl, l), [arg]) + else (Iintop Imul, args) + | _ -> + (Iintop Imul, args) + end + (* Division and modulus *) + (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *) + | Cdivi -> + begin match args with + | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n -> + ((if n = 1 then Imove else Iintop_imm(Idiv, n)), [arg]) + | _ -> + (Iintop Idiv, args) + end + | Cmodi -> + begin match args with + | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n -> + ((if n = 1 then Iconst_int 0n else Iintop_imm(Imod, n)), [arg]) + | _ -> + (Iintop Imod, args) + end + (* Bitwise logical operations have a different range of immediate + operands than the other instructions *) + | Cand -> self#select_logical Iand args + | Cor -> self#select_logical Ior args + | Cxor -> self#select_logical Ixor args + (* Recognize floating-point negate and multiply *) + | Cnegf -> + begin match args with + | [Cop(Cmulf, args)] -> (Ispecific Inegmulf, args) + | _ -> super#select_operation op args + end + (* Recognize floating-point multiply and add/sub *) + | Caddf -> + begin match args with + | [arg; Cop(Cmulf, args)] | [Cop(Cmulf, args); arg] -> + (Ispecific Imuladdf, arg :: args) + | _ -> + super#select_operation op args + end + | Csubf -> + begin match args with + | [arg; Cop(Cmulf, args)] -> + (Ispecific Imulsubf, arg :: args) + | [Cop(Cmulf, args); arg] -> + (Ispecific Inegmulsubf, arg :: args) + | _ -> + super#select_operation op args + end + (* Recognize floating-point square root *) + | Cextcall("sqrt", _, _, _) -> + (Ispecific Isqrtf, args) + (* Recognize bswap instructions *) + | Cextcall("caml_bswap16_direct", _, _, _) -> + (Ispecific(Ibswap 16), args) + | Cextcall("caml_int32_direct_bswap", _, _, _) -> + (Ispecific(Ibswap 32), args) + | Cextcall(("caml_int64_direct_bswap"|"caml_nativeint_direct_bswap"), + _, _, _) -> + (Ispecific (Ibswap 64), args) + (* Other operations are regular *) + | _ -> + super#select_operation op args + +method select_logical op = function + | [arg; Cconst_int n] when is_logical_immediate n -> + (Iintop_imm(op, n), [arg]) + | [Cconst_int n; arg] when is_logical_immediate n -> + (Iintop_imm(op, n), [arg]) + | args -> + (Iintop op, args) + +end + +let fundecl f = (new selector)#emit_fundecl f diff --git a/asmcomp/compilenv.ml b/asmcomp/compilenv.ml index 17870c9..280b131 100644 --- a/asmcomp/compilenv.ml +++ b/asmcomp/compilenv.ml @@ -83,6 +83,15 @@ let make_symbol ?(unitname = current_unit.ui_symbol) idopt = | None -> prefix | Some id -> prefix ^ "__" ^ id +let symbol_in_current_unit name = + let prefix = "caml" ^ current_unit.ui_symbol in + name = prefix || + (let lp = String.length prefix in + String.length name >= 2 + lp + && String.sub name 0 lp = prefix + && name.[lp] = '_' + && name.[lp + 1] = '_') + let read_unit_info filename = let ic = open_in_bin filename in try diff --git a/asmcomp/compilenv.mli b/asmcomp/compilenv.mli index 51cb8c6..9ffb145 100644 --- a/asmcomp/compilenv.mli +++ b/asmcomp/compilenv.mli @@ -31,6 +31,10 @@ val make_symbol: ?unitname:string -> string option -> string corresponds to symbol [id] in the compilation unit [u] (or the current unit). *) +val symbol_in_current_unit: string -> bool + (* Return true if the given asm symbol belongs to the + current compilation unit, false otherwise. *) + val symbol_for_global: Ident.t -> string (* Return the asm symbol that refers to the given global identifier *) diff --git a/asmrun/arm64.S b/asmrun/arm64.S new file mode 100644 index 0000000..de670e6 --- /dev/null +++ b/asmrun/arm64.S @@ -0,0 +1,535 @@ +/***********************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */ +/* */ +/* Copyright 2013 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License, with */ +/* the special exception on linking described in file ../LICENSE. */ +/* */ +/***********************************************************************/ + +/* Asm part of the runtime system, ARM processor, 64-bit mode */ +/* Must be preprocessed by cpp */ + +/* Special registers */ + +#define TRAP_PTR x26 +#define ALLOC_PTR x27 +#define ALLOC_LIMIT x28 +#define ARG x15 +#define TMP x16 +#define TMP2 x17 + +/* Support for CFI directives */ + +#if defined(ASM_CFI_SUPPORTED) +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_ADJUST(n) .cfi_adjust_cfa_offset n +#else +#define CFI_STARTPROC +#define CFI_ENDPROC +#define CFI_ADJUST(n) +#endif + +/* Support for profiling with gprof */ + +#define PROFILE + +/* Macros to load and store global variables. Destroy TMP2 */ + +#if defined(__PIC__) + +#define ADDRGLOBAL(reg,symb) \ + adrp TMP2, :got:symb; \ + ldr reg, [TMP2, #:got_lo12:symb] + +#define LOADGLOBAL(reg,symb) \ + ADDRGLOBAL(TMP2,symb); \ + ldr reg, [TMP2] + +#define STOREGLOBAL(reg,symb) \ + ADDRGLOBAL(TMP2,symb); \ + str reg, [TMP2] + +#else + +#define ADDRGLOBAL(reg,symb) \ + adrp reg, symb; \ + add reg, reg, #:lo12:symb + +#define LOADGLOBAL(reg,symb) \ + adrp TMP2, symb; \ + ldr reg, [TMP2, #:lo12:symb] + +#define STOREGLOBAL(reg,symb) \ + adrp TMP2, symb; \ + str reg, [TMP2, #:lo12:symb] + +#endif + +/* Allocation functions and GC interface */ + + .globl caml_system__code_begin +caml_system__code_begin: + + .align 2 + .globl caml_call_gc +caml_call_gc: + CFI_STARTPROC + PROFILE + /* Record return address */ + STOREGLOBAL(x30, caml_last_return_address) +.Lcaml_call_gc: + /* Record lowest stack address */ + mov TMP, sp + STOREGLOBAL(TMP, caml_bottom_of_stack) + /* Set up stack space, saving return address and frame pointer */ + /* (2 regs RA/GP, 24 allocatable int regs, 24 caller-save float regs) * 8 */ + stp x29, x30, [sp, -400]! + CFI_ADJUST(400) + add x29, sp, #0 + /* Save allocatable integer registers on the stack, in the order + given in proc.ml */ + stp x0, x1, [sp, 16] + stp x2, x3, [sp, 32] + stp x4, x5, [sp, 48] + stp x6, x7, [sp, 64] + stp x8, x9, [sp, 80] + stp x10, x11, [sp, 96] + stp x12, x13, [sp, 112] + stp x14, x15, [sp, 128] + stp x19, x20, [sp, 144] + stp x21, x22, [sp, 160] + stp x23, x24, [sp, 176] + str x25, [sp, 192] + /* Save caller-save floating-point registers on the stack + (callee-saves are preserved by caml_garbage_collection) */ + stp d0, d1, [sp, 208] + stp d2, d3, [sp, 224] + stp d4, d5, [sp, 240] + stp d6, d7, [sp, 256] + stp d16, d17, [sp, 272] + stp d18, d19, [sp, 288] + stp d20, d21, [sp, 304] + stp d22, d23, [sp, 320] + stp d24, d25, [sp, 336] + stp d26, d27, [sp, 352] + stp d28, d29, [sp, 368] + stp d30, d31, [sp, 384] + /* Store pointer to saved integer registers in caml_gc_regs */ + add TMP, sp, #16 + STOREGLOBAL(TMP, caml_gc_regs) + /* Save current allocation pointer for debugging purposes */ + STOREGLOBAL(ALLOC_PTR, caml_young_ptr) + /* Save trap pointer in case an exception is raised during GC */ + STOREGLOBAL(TRAP_PTR, caml_exception_pointer) + /* Call the garbage collector */ + bl caml_garbage_collection + /* Restore registers */ + ldp x0, x1, [sp, 16] + ldp x2, x3, [sp, 32] + ldp x4, x5, [sp, 48] + ldp x6, x7, [sp, 64] + ldp x8, x9, [sp, 80] + ldp x10, x11, [sp, 96] + ldp x12, x13, [sp, 112] + ldp x14, x15, [sp, 128] + ldp x19, x20, [sp, 144] + ldp x21, x22, [sp, 160] + ldp x23, x24, [sp, 176] + ldr x25, [sp, 192] + ldp d0, d1, [sp, 208] + ldp d2, d3, [sp, 224] + ldp d4, d5, [sp, 240] + ldp d6, d7, [sp, 256] + ldp d16, d17, [sp, 272] + ldp d18, d19, [sp, 288] + ldp d20, d21, [sp, 304] + ldp d22, d23, [sp, 320] + ldp d24, d25, [sp, 336] + ldp d26, d27, [sp, 352] + ldp d28, d29, [sp, 368] + ldp d30, d31, [sp, 384] + /* Reload new allocation pointer and allocation limit */ + LOADGLOBAL(ALLOC_PTR, caml_young_ptr) + LOADGLOBAL(ALLOC_LIMIT, caml_young_limit) + /* Free stack space and return to caller */ + ldp x29, x30, [sp], 400 + ret + CFI_ENDPROC + .type caml_call_gc, %function + .size caml_call_gc, .-caml_call_gc + + .align 2 + .globl caml_alloc1 +caml_alloc1: + CFI_STARTPROC + PROFILE +1: sub ALLOC_PTR, ALLOC_PTR, #16 + cmp ALLOC_PTR, ALLOC_LIMIT + b.lo 2f + ret +2: stp x29, x30, [sp, -16]! + CFI_ADJUST(16) + add x29, sp, #0 + /* Record return address */ + STOREGLOBAL(x30, caml_last_return_address) + /* Call GC */ + bl .Lcaml_call_gc + /* Restore return address */ + ldp x29, x30, [sp], 16 + CFI_ADJUST(-16) + /* Try again */ + b 1b + CFI_ENDPROC + .type caml_alloc1, %function + .size caml_alloc1, .-caml_alloc1 + + .align 2 + .globl caml_alloc2 +caml_alloc2: + CFI_STARTPROC + PROFILE +1: sub ALLOC_PTR, ALLOC_PTR, #24 + cmp ALLOC_PTR, ALLOC_LIMIT + b.lo 2f + ret +2: stp x29, x30, [sp, -16]! + CFI_ADJUST(16) + add x29, sp, #0 + /* Record return address */ + STOREGLOBAL(x30, caml_last_return_address) + /* Call GC */ + bl .Lcaml_call_gc + /* Restore return address */ + ldp x29, x30, [sp], 16 + CFI_ADJUST(-16) + /* Try again */ + b 1b + CFI_ENDPROC + .type caml_alloc2, %function + .size caml_alloc2, .-caml_alloc2 + + .align 2 + .globl caml_alloc3 +caml_alloc3: + CFI_STARTPROC + PROFILE +1: sub ALLOC_PTR, ALLOC_PTR, #32 + cmp ALLOC_PTR, ALLOC_LIMIT + b.lo 2f + ret +2: stp x29, x30, [sp, -16]! + CFI_ADJUST(16) + add x29, sp, #0 + /* Record return address */ + STOREGLOBAL(x30, caml_last_return_address) + /* Call GC */ + bl .Lcaml_call_gc + /* Restore return address */ + ldp x29, x30, [sp], 16 + CFI_ADJUST(-16) + /* Try again */ + b 1b + CFI_ENDPROC + .type caml_alloc2, %function + .size caml_alloc2, .-caml_alloc2 + + .align 2 + .globl caml_allocN +caml_allocN: + CFI_STARTPROC + PROFILE +1: sub ALLOC_PTR, ALLOC_PTR, ARG + cmp ALLOC_PTR, ALLOC_LIMIT + b.lo 2f + ret +2: stp x29, x30, [sp, -16]! + CFI_ADJUST(16) + add x29, sp, #0 + /* Record return address */ + STOREGLOBAL(x30, caml_last_return_address) + /* Call GC. This preserves ARG */ + bl .Lcaml_call_gc + /* Restore return address */ + ldp x29, x30, [sp], 16 + CFI_ADJUST(-16) + /* Try again */ + b 1b + CFI_ENDPROC + .type caml_allocN, %function + .size caml_allocN, .-caml_allocN + +/* Call a C function from OCaml */ +/* Function to call is in ARG */ + + .align 2 + .globl caml_c_call +caml_c_call: + CFI_STARTPROC + PROFILE + /* Preserve return address in callee-save register x19 */ + mov x19, x30 + /* Record lowest stack address and return address */ + STOREGLOBAL(x30, caml_last_return_address) + add TMP, sp, #0 + STOREGLOBAL(TMP, caml_bottom_of_stack) + /* Make the exception handler alloc ptr available to the C code */ + STOREGLOBAL(ALLOC_PTR, caml_young_ptr) + STOREGLOBAL(TRAP_PTR, caml_exception_pointer) + /* Call the function */ + blr ARG + /* Reload alloc ptr and alloc limit */ + LOADGLOBAL(ALLOC_PTR, caml_young_ptr) + LOADGLOBAL(ALLOC_LIMIT, caml_young_limit) + /* Return */ + ret x19 + CFI_ENDPROC + .type caml_c_call, %function + .size caml_c_call, .-caml_c_call + +/* Start the OCaml program */ + + .align 2 + .globl caml_start_program +caml_start_program: + CFI_STARTPROC + PROFILE + ADDRGLOBAL(ARG, caml_program) + +/* Code shared with caml_callback* */ +/* Address of OCaml code to call is in ARG */ +/* Arguments to the OCaml code are in x0...x7 */ + +.Ljump_to_caml: + /* Set up stack frame and save callee-save registers */ + stp x29, x30, [sp, -160]! + CFI_ADJUST(160) + add x29, sp, #0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + stp d8, d9, [sp, 96] + stp d10, d11, [sp, 112] + stp d12, d13, [sp, 128] + stp d14, d15, [sp, 144] + /* Setup a callback link on the stack */ + LOADGLOBAL(x8, caml_bottom_of_stack) + LOADGLOBAL(x9, caml_last_return_address) + LOADGLOBAL(x10, caml_gc_regs) + stp x8, x9, [sp, -32]! /* 16-byte alignment */ + CFI_ADJUST(32) + str x10, [sp, 16] + /* Setup a trap frame to catch exceptions escaping the OCaml code */ + LOADGLOBAL(x8, caml_exception_pointer) + adr x9, .Ltrap_handler + stp x8, x9, [sp, -16]! + CFI_ADJUST(16) + add TRAP_PTR, sp, #0 + /* Reload allocation pointers */ + LOADGLOBAL(ALLOC_PTR, caml_young_ptr) + LOADGLOBAL(ALLOC_LIMIT, caml_young_limit) + /* Call the OCaml code */ + blr ARG +.Lcaml_retaddr: + /* Pop the trap frame, restoring caml_exception_pointer */ + ldr x8, [sp], 16 + CFI_ADJUST(-16) + STOREGLOBAL(x8, caml_exception_pointer) + /* Pop the callback link, restoring the global variables */ +.Lreturn_result: + ldr x10, [sp, 16] + ldp x8, x9, [sp], 32 + CFI_ADJUST(-32) + STOREGLOBAL(x8, caml_bottom_of_stack) + STOREGLOBAL(x9, caml_last_return_address) + STOREGLOBAL(x10, caml_gc_regs) + /* Update allocation pointer */ + STOREGLOBAL(ALLOC_PTR, caml_young_ptr) + /* Reload callee-save registers and return address */ + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp d8, d9, [sp, 96] + ldp d10, d11, [sp, 112] + ldp d12, d13, [sp, 128] + ldp d14, d15, [sp, 144] + ldp x29, x30, [sp], 160 + CFI_ADJUST(-160) + /* Return to C caller */ + ret + CFI_ENDPROC + .type .Lcaml_retaddr, %function + .size .Lcaml_retaddr, .-.Lcaml_retaddr + .type caml_start_program, %function + .size caml_start_program, .-caml_start_program + +/* The trap handler */ + + .align 2 +.Ltrap_handler: + CFI_STARTPROC + /* Save exception pointer */ + STOREGLOBAL(TRAP_PTR, caml_exception_pointer) + /* Encode exception bucket as an exception result */ + orr x0, x0, #2 + /* Return it */ + b .Lreturn_result + CFI_ENDPROC + .type .Ltrap_handler, %function + .size .Ltrap_handler, .-.Ltrap_handler + +/* Raise an exception from OCaml */ + + .align 2 + .globl caml_raise_exn +caml_raise_exn: + CFI_STARTPROC + PROFILE + /* Test if backtrace is active */ + LOADGLOBAL(TMP, caml_backtrace_active) + cbnz TMP, 2f +1: /* Cut stack at current trap handler */ + mov sp, TRAP_PTR + /* Pop previous handler and jump to it */ + ldr TMP, [sp, 8] + ldr TRAP_PTR, [sp], 16 + br TMP +2: /* Preserve exception bucket in callee-save register x19 */ + mov x19, x0 + /* Stash the backtrace */ + /* arg1: exn bucket, already in x0 */ + mov x1, x30 /* arg2: pc of raise */ + add x2, sp, #0 /* arg3: sp of raise */ + mov x3, TRAP_PTR /* arg4: sp of handler */ + bl caml_stash_backtrace + /* Restore exception bucket and raise */ + mov x0, x19 + b 1b + CFI_ENDPROC + .type caml_raise_exn, %function + .size caml_raise_exn, .-caml_raise_exn + +/* Raise an exception from C */ + + .align 2 + .globl caml_raise_exception +caml_raise_exception: + CFI_STARTPROC + PROFILE + /* Reload trap ptr, alloc ptr and alloc limit */ + LOADGLOBAL(TRAP_PTR, caml_exception_pointer) + LOADGLOBAL(ALLOC_PTR, caml_young_ptr) + LOADGLOBAL(ALLOC_LIMIT, caml_young_limit) + /* Test if backtrace is active */ + LOADGLOBAL(TMP, caml_backtrace_active) + cbnz TMP, 2f +1: /* Cut stack at current trap handler */ + mov sp, TRAP_PTR + /* Pop previous handler and jump to it */ + ldr TMP, [sp, 8] + ldr TRAP_PTR, [sp], 16 + br TMP +2: /* Preserve exception bucket in callee-save register x19 */ + mov x19, x0 + /* Stash the backtrace */ + /* arg1: exn bucket, already in x0 */ + LOADGLOBAL(x1, caml_last_return_address) /* arg2: pc of raise */ + LOADGLOBAL(x2, caml_bottom_of_stack) /* arg3: sp of raise */ + mov x3, TRAP_PTR /* arg4: sp of handler */ + bl caml_stash_backtrace + /* Restore exception bucket and raise */ + mov x0, x19 + b 1b + CFI_ENDPROC + .type caml_raise_exception, %function + .size caml_raise_exception, .-caml_raise_exception + +/* Callback from C to OCaml */ + + .align 2 + .globl caml_callback_exn +caml_callback_exn: + CFI_STARTPROC + PROFILE + /* Initial shuffling of arguments (x0 = closure, x1 = first arg) */ + mov TMP, x0 + mov x0, x1 /* x0 = first arg */ + mov x1, TMP /* x1 = closure environment */ + ldr ARG, [TMP] /* code pointer */ + b .Ljump_to_caml + CFI_ENDPROC + .type caml_callback_exn, %function + .size caml_callback_exn, .-caml_callback_exn + + .align 2 + .globl caml_callback2_exn +caml_callback2_exn: + CFI_STARTPROC + PROFILE + /* Initial shuffling of arguments (x0 = closure, x1 = arg1, x2 = arg2) */ + mov TMP, x0 + mov x0, x1 /* x0 = first arg */ + mov x1, x2 /* x1 = second arg + mov x2, TMP /* x2 = closure environment */ + ADDRGLOBAL(ARG, caml_apply2) + b .Ljump_to_caml + CFI_ENDPROC + .type caml_callback2_exn, %function + .size caml_callback2_exn, .-caml_callback2_exn + + .align 2 + .globl caml_callback3_exn +caml_callback3_exn: + CFI_STARTPROC + PROFILE + /* Initial shuffling of arguments */ + /* (x0 = closure, x1 = arg1, x2 = arg2, x3 = arg3) */ + mov TMP, x0 + mov x0, x1 /* x0 = first arg */ + mov x1, x2 /* x1 = second arg */ + mov x2, x3 /* x2 = third arg */ + mov x3, TMP /* x3 = closure environment */ + ADDRGLOBAL(ARG, caml_apply3) + b .Ljump_to_caml + CFI_ENDPROC + .type caml_callback3_exn, %function + .size caml_callback3_exn, .-caml_callback3_exn + + .align 2 + .globl caml_ml_array_bound_error +caml_ml_array_bound_error: + CFI_STARTPROC + PROFILE + /* Load address of [caml_array_bound_error] in ARG */ + ADDRGLOBAL(ARG, caml_array_bound_error) + /* Call that function */ + b caml_c_call + CFI_ENDPROC + .type caml_ml_array_bound_error, %function + .size caml_ml_array_bound_error, .-caml_ml_array_bound_error + + .globl caml_system__code_end +caml_system__code_end: + +/* GC roots for callback */ + + .data + .align 3 + .globl caml_system__frametable +caml_system__frametable: + .quad 1 /* one descriptor */ + .quad .Lcaml_retaddr /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ + .align 3 + .type caml_system__frametable, %object + .size caml_system__frametable, .-caml_system__frametable diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h index ff19847..68ec837 100644 --- a/asmrun/signals_osdep.h +++ b/asmrun/signals_osdep.h @@ -92,6 +92,25 @@ #define CONTEXT_YOUNG_PTR (context->uc_mcontext.arm_r8) #define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address) +/****************** ARM64, Linux */ + +#elif defined(TARGET_arm64) && defined(SYS_linux) + + #include + + #define DECLARE_SIGNAL_HANDLER(name) \ + static void name(int sig, siginfo_t * info, ucontext_t * context) + + #define SET_SIGACT(sigact,name) \ + sigact.sa_sigaction = (void (*)(int,siginfo_t *,void *)) (name); \ + sigact.sa_flags = SA_SIGINFO + + typedef unsigned long context_reg; + #define CONTEXT_PC (context->uc_mcontext.pc) + #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.regs[26]) + #define CONTEXT_YOUNG_PTR (context->uc_mcontext.regs[27]) + #define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address) + /****************** AMD64, Solaris x86 */ #elif defined(TARGET_amd64) && defined (SYS_solaris) diff --git a/asmrun/stack.h b/asmrun/stack.h index 756db95..031e408 100644 --- a/asmrun/stack.h +++ b/asmrun/stack.h @@ -65,6 +65,11 @@ #define Callback_link(sp) ((struct caml_context *)((sp) + 16)) #endif +#ifdef TARGET_arm64 +#define Saved_return_address(sp) *((intnat *)((sp) - 8)) +#define Callback_link(sp) ((struct caml_context *)((sp) + 16)) +#endif + /* Structure of OCaml callback contexts */ struct caml_context { diff --git a/byterun/interp.c b/byterun/interp.c index b99ed2f..af9fa0f 100644 --- a/byterun/interp.c +++ b/byterun/interp.c @@ -173,6 +173,12 @@ sp is a local copy of the global variable caml_extern_sp. */ #define SP_REG asm("%r14") #define ACCU_REG asm("%r13") #endif +#ifdef __aarch64__ +#define PC_REG asm("%x19") +#define SP_REG asm("%x20") +#define ACCU_REG asm("%x21") +#define JUMPTBL_BASE_REG asm("%x22") +#endif #endif /* Division and modulus madness */ diff --git a/configure b/configure index 9b02664..36edfab 100755 --- a/configure +++ b/configure @@ -657,6 +657,7 @@ if test $withsharedlibs = "yes"; then x86_64-*-netbsd*) natdynlink=true;; i386-*-gnu0.3) natdynlink=true;; arm*-*-linux*) natdynlink=true;; + aarch64-*-linux*) natdynlink=true;; esac fi @@ -715,6 +716,7 @@ case "$host" in x86_64-*-netbsd*) arch=amd64; system=netbsd;; x86_64-*-openbsd*) arch=amd64; system=openbsd;; x86_64-*-darwin*) arch=amd64; system=macosx;; + aarch64-*-linux*) arch=arm64; system=linux;; esac # Some platforms exist both in 32-bit and 64-bit variants, not distinguished @@ -767,7 +769,7 @@ case "$arch,$model,$system" in aspp='gcc -m64 -c';; amd64,*,*) as='as' aspp='gcc -c';; - arm,*,*) as='as'; + arm,*,*|arm64,*,*)as='as'; aspp='gcc -c';; i386,*,solaris) as='as' aspp='/usr/ccs/bin/as -P';; @@ -1193,6 +1195,7 @@ case "$arch" in fi;; power) bng_arch=ppc; bng_asm_level=1;; amd64) bng_arch=amd64; bng_asm_level=1;; + arm64) bng_arch=arm64; bng_asm_level=1;; *) bng_arch=generic; bng_asm_level=0;; esac diff --git a/otherlibs/num/bng.c b/otherlibs/num/bng.c index 5bbedb0..0483ef5 100644 --- a/otherlibs/num/bng.c +++ b/otherlibs/num/bng.c @@ -23,12 +23,10 @@ #include "bng_amd64.c" #elif defined(BNG_ARCH_ppc) #include "bng_ppc.c" -#elif defined (BNG_ARCH_alpha) -#include "bng_alpha.c" #elif defined (BNG_ARCH_sparc) #include "bng_sparc.c" -#elif defined (BNG_ARCH_mips) -#include "bng_mips.c" +#elif defined (BNG_ARCH_arm64) +#include "bng_arm64.c" #endif #endif diff --git a/otherlibs/num/bng_arm64.c b/otherlibs/num/bng_arm64.c new file mode 100644 index 0000000..50843a0 --- /dev/null +++ b/otherlibs/num/bng_arm64.c @@ -0,0 +1,20 @@ +/***********************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */ +/* */ +/* Copyright 2013 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License, with */ +/* the special exception on linking described in file ../../LICENSE. */ +/* */ +/***********************************************************************/ + +/* Code specific for the ARM 64 (AArch64) architecture */ + +#define BngMult(resh,resl,arg1,arg2) \ + asm("mul %0, %2, %3 \n\t" \ + "umulh %1, %2, %3" \ + : "=&r" (resl), "=&r" (resh) \ + : "r" (arg1), "r" (arg2)) diff --git a/testsuite/tests/asmcomp/Makefile b/testsuite/tests/asmcomp/Makefile index fd01d33..9dca023 100644 --- a/testsuite/tests/asmcomp/Makefile +++ b/testsuite/tests/asmcomp/Makefile @@ -126,7 +126,7 @@ parsecmm.mli parsecmm.ml: parsecmm.mly lexcmm.ml: lexcmm.mll @$(OCAMLLEX) -q lexcmm.mll -CASES=fib tak quicksort quicksort2 soli \ +CASES=fib tak quicksort quicksort2 soli integr \ arith checkbound tagged-fib tagged-integr tagged-quicksort tagged-tak ARGS_fib=-DINT_INT -DFUN=fib main.c ARGS_tak=-DUNIT_INT -DFUN=takmain main.c diff --git a/testsuite/tests/asmcomp/arm64.S b/testsuite/tests/asmcomp/arm64.S new file mode 100644 index 0000000..3bb4110 --- /dev/null +++ b/testsuite/tests/asmcomp/arm64.S @@ -0,0 +1,52 @@ +/***********************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */ +/* */ +/* Copyright 2013 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the Q Public License version 1.0. */ +/* */ +/***********************************************************************/ + + .globl call_gen_code + .align 2 +call_gen_code: + /* Set up stack frame and save callee-save registers */ + stp x29, x30, [sp, -160]! + add x29, sp, #0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + stp d8, d9, [sp, 96] + stp d10, d11, [sp, 112] + stp d12, d13, [sp, 128] + stp d14, d15, [sp, 144] + /* Shuffle arguments */ + mov x8, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + /* Call generated asm */ + blr x8 + /* Reload callee-save registers and return address */ + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp d8, d9, [sp, 96] + ldp d10, d11, [sp, 112] + ldp d12, d13, [sp, 128] + ldp d14, d15, [sp, 144] + ldp x29, x30, [sp], 160 + ret + + .globl caml_c_call + .align 2 +caml_c_call: + br x15 diff --git a/testsuite/tests/asmcomp/main.ml b/testsuite/tests/asmcomp/main.ml index d67a643..82b699e 100644 --- a/testsuite/tests/asmcomp/main.ml +++ b/testsuite/tests/asmcomp/main.ml @@ -13,6 +13,7 @@ open Clflags let compile_file filename = + Clflags.dlcode := false; Compilenv.reset "test"; Emit.begin_assembly(); let ic = open_in filename in -- 1.8.4.2