From c96ba2b2678e4b92e7969c683d4cb3f1648af813 Mon Sep 17 00:00:00 2001
From: Jacques Comeaux <jacquesrcomeaux@protonmail.com>
Date: Sun, 19 May 2024 13:52:10 -0500
Subject: Rework assembler for simplified instruction set

- Remove commas and brackets from syntax
- Rename opcodes for unambiguous instruction encodings
- Redesign parse instruction encoding
- Implement opcode parser
- Add bit-width restriction to octal parser
---
 assembler/assemble.s   | 147 ++++++++++++++-----------
 assembler/get_char.s   |  21 ----
 assembler/instructions | 233 ++++++++++++++++++++++++++++++++++++++++
 assembler/notes        |  35 ------
 assembler/octal.s      |  72 +++++++++++--
 assembler/opcode.s     | 283 +++++++++++++++++++++++++++++++++++++++++++++++++
 assembler/register.s   |  59 +++++------
 assembler/string.s     |  16 ---
 8 files changed, 691 insertions(+), 175 deletions(-)
 delete mode 100644 assembler/get_char.s
 create mode 100644 assembler/instructions
 delete mode 100644 assembler/notes
 create mode 100644 assembler/opcode.s
 delete mode 100644 assembler/string.s

(limited to 'assembler')

diff --git a/assembler/assemble.s b/assembler/assemble.s
index bf31edf..ef3893d 100644
--- a/assembler/assemble.s
+++ b/assembler/assemble.s
@@ -1,71 +1,100 @@
+.syntax unified
+.cpu cortex-m0plus
+.thumb
+
+.type assemble, %function
+.global assemble
+
+// TODO:
+// - implement opcode parser
+// - test each instruction (do this later -- much easier)
+// - test 4-bit reg instructions
+// - test BEQ overlaps
+// - decide on additional push or pops
+
 assemble:
-  BL receive_op
-  LDR R5, =0x20002000 // opcode table
-  LDR R1, =0x20001000 // opcode buffer base addr
-loop:
-  LDR R0, [R5, 0]     // load the string addr
-  BL string_compare
-  BEQ match
-  ADDS R5, 8          // next row of table
-  LDR R0, =0x20002080 // opcode table end
-  CMP R5, R0
-  BLO loop            // Keep going if lower than end
-  B redo_line         // redo line if opcode was not found
-match:
-  LDR R6, [R5, 4]     // load the parse instructions
+  // LDR R3, [SP, 20]
+  MOVS R6, 0
+  LDR R7, =0x00C8E893
+  // LDR R7, =0xE0E3A588
+  MOVS R0, ' 
+  MOV R8, R0
 main_loop:
-  MOVS R0, 0xFF   // lsb mask
-  ANDS R0, R6     // store in R0
-  MOVS R1, 0x80   // bit 7 mask
-  TST R0, R1      // compare (AND) to R0 byte
-  BNE handle_imm  // if IMM (== 1)
-  LSRS R0, 4      // R0 hold 3 or 4 (or 0)
-  BEQ handle_brackets // if BRACKETS (== 0)
-handle_reg:
-  BL register    // result is put in R4
-  MOVS R0, 0x0F  // lower nibble mask
-  ANDS R0, R6    // store shift amount in R0
-  LSLS R4, R0    // shift the result by the shift amount
-  ORRS R5, R4    // OR the register code into the word under construction
+  LSRS R0, R7, 8    // just peek
+  BNE skip          // if more stuff then skip
+  MOVS R0, '\r
+  MOV R8, R0        //set end char to carriage return
+skip:
+  // MOVS R0, 0xFF     // lsb mask
+  // ANDS R0, R7       // store in R0
+  UXTB R0, R7       // store lsb in R0
+  LSRS R1, R0, 4    // upper nibble
+  CMP R1, 0xC       // if 0xxxxxxx or 10xxxxxx
+  BLO opcode
+  CMP R1, 0xE       // if 110xyyyy
+  BLO handle_imm
+handle_reg:         // if 111xyyyy
+  MOVS R1, (1<<4)   // bit 4 mask
+  ANDS R0, R1       // get bit 4
+  ADDS R0, 3        // add 3 to it (now 3 or 4)
+  BL register       // result is put in R4
+  MOVS R0, 0x0F     // lower nibble mask
+  ANDS R0, R7       // store shift amount in R0
+  LSLS R4, R0       // shift the result by the shift amount
+  ORRS R6, R4       // OR the register code into the word under construction
   B done_stuff
+opcode:
+  MOVS R2, 9        // shift amount for 7-bit opcode
+  MOVS R1, (1<<7)   // bit 7 mask
+  TST R0, R1        // check bit 7
+  BEQ fin           // if zero done
+  BICS R0, R1       // clear bit 7
+  MOVS R2, 11       // shift amount for 5-bit opcode high
+  MOVS R1, (1<<5)   // bit 5 mask
+  TST R0, R1        // check bit 5
+  BEQ fin           // if zero done
+  BICS R0, R1       // clear bit 5
+  MOVS R2, 6        // shift amount for 5-bit opcode low
+fin:
+  LSLS R0, R2
+  ORRS R6, R0
+  B here
 handle_imm:
   MOVS R1, 0x0F  // lower nibble mask
   ANDS R0, R1    // store immediate width in R0
   BL octal       // result is put in R4
-  MOVS R0, 0x7F  // least significant 7 bits mask
-  ANDS R0, R6    // store ls 7 bits in R0
-  LSRS R0, 4     // shift right to get shift amount
+  LSLS R0, R7, 27
+  LSRS R0, 31
+  MOVS R2, 6
+  MULS R0, R2    // R0 has shift amount (0 or 6)
   LSLS R4, R0    // shift the result by the shift amount
+  ORRS R6, R4    // OR the immediate into the word under construction
 done_stuff:
-  LSRS R6, 0x8 // get next parse instruction
-  BEQ done     // if it's zero there are no more things to parse
-  MOVS R0, 0   // copy the end_char into R0
-  ORRS R0, R9
-  BL uart_send // echo the comma (or bracket)
-10:
-  BL get_char
-  MOVS R1, '  // space char
-  CMP R0, R1
-  BNE 10b // keep trying if not space
-  BL uart_send // echo the space
-  B main_loop
-handle_brackets:
-  BL get_char    // char in R0
-  MOVS R1, '[    // open bracket
-  CMP R0, R1
-  BNE handle_brackets // keep trying if not bracket
-  BL uart_send   // echo bracket
-  MOVS R8, 1     // 1 means we are now in brackets
-  LSRS R6, 0x8   // get next parse instruction
-  BNE main_loop
+  MOV R0, R8     // copy the end_char into R0
+  BL uart_send   // echo the space (or carriage return)
+here:
+  LSRS R7, 0x8   // get next parse instruction
+  BNE main_loop  // if it's nonzero there are more things to parse
 done:
-  TST R8, R8 // R8 == whether we are in bracket or not
-  BEQ no_brackets
-  MOVS R0, ']  // echo bracket
-  BL uart_send
-no_bracket:
-  MOVS R0, '\r  // send carriage return
-  BL uart_send
   MOVS R0, '\n  // send newline
   BL uart_send
-  B next_instr
+  MOVS R0, R6
+  BL send_hex
+  B assemble
+
+.type get_char, %function
+.global get_char
+
+// R8: end_char
+get_char:
+  PUSH {LR}
+  BL uart_recv
+  CMP R0, 025 // ^U (NAK)
+  BEQ redo_line
+  // CMP R0, 004 // ^D (EOT)
+  // BEQ done_for_real
+  CMP R0, R8
+  POP {PC}
+redo_line:
+  POP {R0}
+  B done
diff --git a/assembler/get_char.s b/assembler/get_char.s
deleted file mode 100644
index f1f43e7..0000000
--- a/assembler/get_char.s
+++ /dev/null
@@ -1,21 +0,0 @@
-// R9: end_char
-get_char:
-  PUSH {LR}
-  BL uart_recv
-  MOVS R1, 025 // ^U (NAK)
-  CMP R0, R1
-  BEQ redo_line
-  MOVS R1, 004 // ^D (EOT)
-  CMP R0, R1
-  BEQ done_for_real
-  CMP R0, R9
-  POP {PC}
-
-get_line:
-  BL get_char
-  ...
-  B get_line
-
-redo_line:
-  ...
-  B get_line
diff --git a/assembler/instructions b/assembler/instructions
new file mode 100644
index 0000000..88ebd23
--- /dev/null
+++ b/assembler/instructions
@@ -0,0 +1,233 @@
+Clobber
+R0: arg1, uart result
+R1: arg2
+R2:
+R3: shift_amount
+-------------
+Save
+R4: octal result, register result
+R5: second-level (octal or register) scratch
+R6: word under construction
+R7: parse instructions
+R8: end_char
+R8:
+R9:
+R10:
+R11:
+R12:
+
+Encoding
+
+start with all 0
+
+0xxxxxxx // 7-bit field xxxxxxx at [15:9] (non zero)
+100xxxxx // 5-bit field xxxxx at [15:11]
+101xxxxx // 5-bit field xxxxx at [10:6]
+1100yyyy // octal immediate with shift amount 0 and bit-width yyyy
+1101yyyy // octal immediate with shift amount 6 and bit-width yyyy
+1110yyyy // 3-bit reg with shift amount yyyy
+1111yyyy // 4-bit reg with shift amount yyyy (excluding yyyy=1111=15)
+
+ADCS (register)     T1  01000 00101 Rm Rdn    E3 E0 A5 88
+ADDSI3 (immediate)  T1  0001110 imm3 Rn Rd    D3 E3 E0 0E
+ADDSI8 (immediate)  T2  00110 Rdn imm8        00 C8 E3 86
+ADDSR (register)    T1  0001100 Rm Rn Rd      E6 E3 E0 0C
+ADDRHI (register)   T2  010001001 Rm4 Rdn     F3 E0 B2 88     TODO (dn + 8)
+ADDRLO (register)   T2  010001000 Rm4 Rdn     F3 E0 B0 88
+ADR                 T1  10100 Rd imm8         00 C8 E3 94
+ANDS (register)     T1  01000 00000 Rm Rdn    E3 E0 A0 88
+ASRSI (immediate)   T1  00010 imm5 Rm Rd      D5 E3 E0 82
+ASRSR (register)    T1  01000 00100 Rm Rdn    E3 E0 A4 88
+BEQ                 T1  11010 000 imm8        00 C8 A0 9A     TODO overlap
+BNE                 T1  11010 001 imm8        00 C8 A4 9A
+BHS                 T1  11010 010 imm8        00 C8 A8 9A
+BLO                 T1  11010 011 imm8        00 C8 AC 9A
+BMI                 T1  11010 100 imm8        00 C8 B0 9A
+BPL                 T1  11010 101 imm8        00 C8 B4 9A
+BVS                 T1  11010 110 imm8        00 C8 B8 9A
+BVC                 T1  11010 111 imm8        00 C8 BC 9A
+BHI                 T1  11011 000 imm8        00 C8 A0 9B
+BLS                 T1  11011 001 imm8        00 C8 A4 9B
+BGE                 T1  11011 010 imm8        00 C8 A8 9B
+BLT                 T1  11011 011 imm8        00 C8 AC 9B
+BGT                 T1  11011 100 imm8        00 C8 B0 9B
+BLE                 T1  11011 101 imm8        00 C8 B4 9B
+B                   T2  11100 imm11           00 00 CB 9C
+BICS (register)     T1  01000 01110 Rm Rdn    E3 E0 AE 88
+BLHI                T1  11110 imm11           00 00 CB 9E
+BLLO                T1  11111 imm11           00 00 CB 9F
+BLX                 T1  01000 1111 Rm 000     00 E3 BE 88
+BX                  T1  01000 1110 Rm 000     00 E3 BC 88
+CMN (register)      T1  01000 01011 Rm Rn     E3 E0 AB 88
+CMPI (immediate)    T1  00101 Rn imm8         00 C8 E8 85
+CMPR (register)     T1  01000 01010 Rm Rn     E3 E0 AA 88
+CMPRHI (register)   T2  010001011 Rm4 Rd      F3 E0 B6 88      TODO (d + 8)
+CMPRLO (register)   T2  010001010 Rm4 Rd      F3 E0 B4 88 
+EORS (register)     T1  01000 00001 Rm Rdn    E3 E0 A1 88
+LDRI5 (immediate)   T1  01101 imm5 Rn Rt      D5 E3 E0 8D
+LDRI8 (immediate)   T2  10011 Rt imm8         00 C8 E8 93
+LDRL (literal)      T1  01001 Rt imm8         00 C8 E8 89
+LDRR (register)     T1  0101100 Rm Rn Rt      E6 E3 E0 2C
+LDRBI (immediate)   T1  01111 imm5 Rn Rt      D5 E3 E0 8F
+LDRBR (register)    T1  0101110 Rm Rn Rt      E6 E3 E0 2E
+LDRHI (immediate)   T1  10001 imm5 Rn Rt      D5 E3 E0 91
+LDRHR (register)    T1  0101101 Rm Rn Rt      E6 E3 E0 2D
+LDRSB (register)    T1  0101011 Rm Rn Rt      E6 E3 E0 2B
+LDRSH (register)    T1  0101111 Rm Rn Rt      E6 E3 E0 2F
+LSLSI (immediate)   T1  00000 imm5 Rm Rd      D5 E3 E0 80
+LSLSR (register)    T1  01000 00010 Rm Rdn    E3 E0 A2 88
+LSRSI (immediate)   T1  00001 imm5 Rm Rd      D5 E3 E0 81
+LSRSR (register)    T1  01000 00011 Rm Rdn    E3 E0 A3 88
+MOVSI (immediate)   T1  00100 Rd imm8         00 C8 E8 84
+MOVRHI (register)   T1  010001101 Rm4 Rd      F3 E0 BA 88     TODO (d + 8)
+MOVRLO (register)   T1  010001100 Rm4 Rd      F3 E0 B8 88
+MOVSR (register)    T2  00000 00000 Rm Rd     E3 E0 A0 80
+MULS                T1  01000 01101 Rn Rdm    E3 E0 AD 88
+MVNS (register)     T1  01000 01111 Rm Rd     E3 E0 AF 88
+ORRS (register)     T1  01000 01100 Rm Rdn    E3 E0 AC 88
+PUSHLR              T1  10110 10100 000000    00 00 B4 96     TODO nothing to do
+POPPC               T1  10111 10100 000000    00 00 B4 97
+REV                 T1  10111 01000 Rm Rd     E3 E0 A8 97
+REV16               T1  10111 01001 Rm Rd     E3 E0 A9 97
+REVSH               T1  10111 01011 Rm Rd     E3 E0 AB 97
+RORS (register)     T1  01000 00111 Rm Rdn    E3 E0 A7 88
+NEG (immediate)     T1  01000 01001 Rn Rd     E3 E0 A9 88     TODO it's negate
+SBCS (register)     T1  01000 00110 Rm Rdn    E3 E0 A6 88
+STRI5 (immediate)   T1  01100 imm5 Rn Rt      D5 E3 E0 8C
+STRI8 (immediate)   T2  10010 Rt imm8         00 C8 E8 92
+STRR (register)     T1  0101000 Rm Rn Rt      E6 E3 E0 28
+STRBI (immediate)   T1  01110 imm5 Rn Rt      D5 E3 E0 8E
+STRBR (register)    T1  0101010 Rm Rn Rt      E6 E3 E0 2A
+STRHI (immediate)   T1  10000 imm5 Rn Rt      D5 E3 E0 90
+STRHR (register)    T1  0101001 Rm Rn Rt      E6 E3 E0 29
+SUBSI3 (immediate)  T1  0001111 imm3 Rn Rd    D3 E3 E0 0F
+SUBSI8 (immediate)  T2  00111 Rdn imm8        00 C8 E8 87
+SUBSR (register)    T1  0001101 Rm Rn Rd      E6 E3 E0 0D
+SVC                 T1  11011 111 imm8        00 C8 BC 9B
+SXTB                T1  10110 01001 Rm Rd     E3 E0 A9 96
+SXTH                T1  10110 01000 Rm Rd     E3 E0 A8 96
+TST (register)      T1  01000 01000 Rm Rd     E3 E0 A8 88
+UXTB                T1  10110 01011 Rm Rd     E3 E0 AB 96
+UXTH                T1  10110 01010 Rm Rd     E3 E0 AA 96
+
+A for ARITHMETIC 
+B for BITWISE
+C for COMPARE
+D for DUPLICATE
+J for JUMP
+L for LOAD
+P for PUSH or POP
+R for ROTATE or REVERSE
+S for STORE
+T for TRANSLATE
+
+A for ARITHMETIC 
+ 
+AAC     ADCS (register)     T1  01000 00101 Rm Rdn    E3 E0 A5 88
+AAI3    ADDSI3 (immediate)  T1  0001110 imm3 Rn Rd    D3 E3 E0 0E
+AAI8    ADDSI8 (immediate)  T2  00110 Rdn imm8        00 C8 E3 86
+AARF    ADDSR (register)    T1  0001100 Rm Rn Rd      E6 E3 E0 0C
+AARH    ADDRHI (register)   T2  010001001 Rm4 Rdn     F3 E0 B2 88
+AARL    ADDRLO (register)   T2  010001000 Rm4 Rdn     F3 E0 B0 88
+AAA     ADR                 T1  10100 Rd imm8         00 C8 E3 94
+ASC     SBCS (register)     T1  01000 00110 Rm Rdn    E3 E0 A6 88
+ASI3    SUBSI3 (immediate)  T1  0001111 imm3 Rn Rd    D3 E3 E0 0F
+ASI8    SUBSI8 (immediate)  T2  00111 Rdn imm8        00 C8 E8 87
+ASR     SUBSR (register)    T1  0001101 Rm Rn Rd      E6 E3 E0 0D
+AHI     SRSI (immediate)    T1  00010 imm5 Rm Rd      D5 E3 E0 82
+AHR     SRSR (register)     T1  01000 00100 Rm Rdn    E3 E0 A4 88
+AM      MULS                T1  01000 01101 Rn Rdm    E3 E0 AD 88
+AN      NEG (immediate)     T1  01000 01001 Rn Rd     E3 E0 A9 88
+
+J for JUMP
+
+JE      BEQ                 T1  11010 000 imm8        00 C8 A0 9A
+JN      BNE                 T1  11010 001 imm8        00 C8 A4 9A
+JHS     BHS                 T1  11010 010 imm8        00 C8 A8 9A
+JLO     BLO                 T1  11010 011 imm8        00 C8 AC 9A
+JM      BMI                 T1  11010 100 imm8        00 C8 B0 9A
+JP      BPL                 T1  11010 101 imm8        00 C8 B4 9A
+JVS     BVS                 T1  11010 110 imm8        00 C8 B8 9A
+JVC     BVC                 T1  11010 111 imm8        00 C8 BC 9A
+JHI     BHI                 T1  11011 000 imm8        00 C8 A0 9B
+JLS     BLS                 T1  11011 001 imm8        00 C8 A4 9B
+JGE     BGE                 T1  11011 010 imm8        00 C8 A8 9B
+JLT     BLT                 T1  11011 011 imm8        00 C8 AC 9B
+JGT     BGT                 T1  11011 100 imm8        00 C8 B0 9B
+JLE     BLE                 T1  11011 101 imm8        00 C8 B4 9B
+JA      B                   T2  11100 imm11           00 00 CB 9C
+JIH     BLHI                T1  11110 imm11           00 00 CB 9E
+JIL     BLLO                T1  11111 imm11           00 00 CB 9F
+JLR     BLX                 T1  01000 1111 Rm 000     00 E3 BE 88
+JR      BX                  T1  01000 1110 Rm 000     00 E3 BC 88
+JS      SVC                 T1  11011 111 imm8        00 C8 BC 9B
+
+C for COMPARE
+
+CN      CMN (register)      T1  01000 01011 Rm Rn     E3 E0 AB 88
+CI      CMPI (immediate)    T1  00101 Rn imm8         00 C8 E8 85
+CR3     CMPR (register)     T1  01000 01010 Rm Rn     E3 E0 AA 88
+CR4H    CMPRHI (register)   T2  010001011 Rm4 Rd      F3 E0 B6 88
+CR4L    CMPRLO (register)   T2  010001010 Rm4 Rd      F3 E0 B4 88 
+
+L for LOAD
+
+LI5     LDRI5 (immediate)   T1  01101 imm5 Rn Rt      D5 E3 E0 8D
+LI8     LDRI8 (immediate)   T2  10011 Rt imm8         00 C8 E8 93
+LL      LDRL (literal)      T1  01001 Rt imm8         00 C8 E8 89
+LR      LDRR (register)     T1  0101100 Rm Rn Rt      E6 E3 E0 2C
+LBI     LDRBI (immediate)   T1  01111 imm5 Rn Rt      D5 E3 E0 8F
+LBR     LDRBR (register)    T1  0101110 Rm Rn Rt      E6 E3 E0 2E
+LHI     LDRHI (immediate)   T1  10001 imm5 Rn Rt      D5 E3 E0 91
+LHR     LDRHR (register)    T1  0101101 Rm Rn Rt      E6 E3 E0 2D
+LSB     LDRSB (register)    T1  0101011 Rm Rn Rt      E6 E3 E0 2B
+LSH     LDRSH (register)    T1  0101111 Rm Rn Rt      E6 E3 E0 2F
+
+S for STORE
+
+SI5     STRI5 (immediate)   T1  01100 imm5 Rn Rt      D5 E3 E0 8C
+SI8     STRI8 (immediate)   T2  10010 Rt imm8         00 C8 E8 92
+SR      STRR (register)     T1  0101000 Rm Rn Rt      E6 E3 E0 28
+SBI     STRBI (immediate)   T1  01110 imm5 Rn Rt      D5 E3 E0 8E
+SBR     STRBR (register)    T1  0101010 Rm Rn Rt      E6 E3 E0 2A
+SHI     STRHI (immediate)   T1  10000 imm5 Rn Rt      D5 E3 E0 90
+SHR     STRHR (register)    T1  0101001 Rm Rn Rt      E6 E3 E0 29
+
+D for DUPLICATE
+
+DI      MOVSI (immediate)   T1  00100 Rd imm8         00 C8 E8 84
+DRH     MOVRHI (register)   T1  010001101 Rm4 Rd      F3 E0 BA 88
+DRL     MOVRLO (register)   T1  010001100 Rm4 Rd      F3 E0 B8 88
+DRF     MOVSR (register)    T2  00000 00000 Rm Rd     E3 E0 A0 80
+DSB     SXTB                T1  10110 01001 Rm Rd     E3 E0 A9 96
+DSH     SXTH                T1  10110 01000 Rm Rd     E3 E0 A8 96
+DUB     UXTB                T1  10110 01011 Rm Rd     E3 E0 AB 96
+DUH     UXTH                T1  10110 01010 Rm Rd     E3 E0 AA 96
+
+B for BITWISE
+
+BC      BICS (register)     T1  01000 01110 Rm Rdn    E3 E0 AE 88
+BA      ANDS (register)     T1  01000 00000 Rm Rdn    E3 E0 A0 88
+BX      EORS (register)     T1  01000 00001 Rm Rdn    E3 E0 A1 88
+BO      ORRS (register)     T1  01000 01100 Rm Rdn    E3 E0 AC 88
+BI      MVNS (register)     T1  01000 01111 Rm Rd     E3 E0 AF 88
+BT      TST (register)      T1  01000 01000 Rm Rd     E3 E0 A8 88
+
+T for TRANSLATE
+
+TLI     LSLSI (immediate)   T1  00000 imm5 Rm Rd      D5 E3 E0 80
+TLR     LSLSR (register)    T1  01000 00010 Rm Rdn    E3 E0 A2 88
+TRI     LSRSI (immediate)   T1  00001 imm5 Rm Rd      D5 E3 E0 81
+TRR     LSRSR (register)    T1  01000 00011 Rm Rdn    E3 E0 A3 88
+
+R for ROTATE or REVERSE
+
+RR      RORS (register)     T1  01000 00111 Rm Rdn    E3 E0 A7 88
+RBW     REV                 T1  10111 01000 Rm Rd     E3 E0 A8 97
+RBH     REV16               T1  10111 01001 Rm Rd     E3 E0 A9 97
+RBS     REVSH               T1  10111 01011 Rm Rd     E3 E0 AB 97
+
+P for PUSH or POP
+
+PL      PUSHLR              T1  10110 10100 000000    00 00 B4 96
+PP      POPPC               T1  10111 10100 000000    00 00 B4 97
diff --git a/assembler/notes b/assembler/notes
deleted file mode 100644
index 6206eff..0000000
--- a/assembler/notes
+++ /dev/null
@@ -1,35 +0,0 @@
-Clobber
-R0: arg1, uart result
-R1: arg2
-R2:
-R3: shift_amount
--------------
-Save
-R4: octal result, register result
-R5: word under construction
-R6: parse instructions
-R8:
-R9: end_char
-
-1yyyxxxx Imm         yyy = shift amount xxxx = bit-width
-00000001 Brackets
-0011yyyy Reg        yyyy = shift amount
-0100yyyy Reg
-
-MOVS 0x 00 00 88 38
-ADDS 0x 00 00 88 38
-SUBS 0x 00 00 88 38
-CMP  0x 00 00 33 30
-ANDS 0x 00 00 33 30
-ORRS 0x 00 00 33 30
-TST  0x 00 00 33 30
-LSLS 0x 00 E5 33 30
-LSRS 0x 00 E5 33 30
-RORS 0x 00 00 33 30
-LDR  0x E5 33 01 30
-LDRB 0x 36 33 01 30
-STR  0x E5 33 01 30
-STRB 0x 36 33 01 30
-B<c> 0x 00 00 00 88
-B    0x 00 00 00 8B
-BX   0x 00 00 00 43
diff --git a/assembler/octal.s b/assembler/octal.s
index 7382ca4..18abebd 100644
--- a/assembler/octal.s
+++ b/assembler/octal.s
@@ -1,25 +1,75 @@
+.syntax unified
+.cpu cortex-m0plus
+.thumb
+
+.type octal, %function
+.global octal
+
 octal:
   PUSH {LR}
+  MOVS R5, R0
+
+  // Handle bit-width = 0
+  BNE 30f
 10:
   BL get_char
-  MOVS R1, '0
-  CMP R0, R1
+  CMP R0, '0
   BNE 10b
   BL uart_send
   MOVS R4, 0
 20:
   BL get_char
-  BEQ 30f
-  MOVS R1, '0
-  CMP R0, R1
-  BLO 20b
-  MOVS R1, '7
-  CMP R0, R1
-  BHI 20b
+  BNE 20b
+  POP {PC}
+30:
+
+  // R4 will become '1 or '3 or '7
+  MOVS R0, 3
+  CMP R5, 3
+  BHS 40f
+  MOVS R0, R5
+40:
+  MOVS R4, 1
+  LSLS R4, R0
+  ADDS R4, ('0 - 1)
+
+  // Get first char
+50:
+  BL get_char
+  CMP R0, '0
+  BLO 50b
+  CMP R0, R4
+  BHI 50b
+  BL uart_send
+  SUBS R0, '0
+  MOVS R4, R0
+
+  // Subtract 1, 2, or 3 from bit-width
+  MOVS R1, 0
+60:
+  ADDS R1, 1
+  LSRS R0, 1
+  BNE 60b
+  SUBS R5, R1
+
+  // Loop for remaining chars
+70:
+  CMP R5, 3
+  BLO 80f
+  BL get_char
+  BEQ 90f
+  CMP R0, '0
+  BLO 70b
+  CMP R0, '7
+  BHI 70b
   BL uart_send
   SUBS R0, '0
   LSLS R4, 3
   ADDS R4, R0
-  B 20b
-30:
+  SUBS R5, 3
+  B 70b
+80:
+  BL get_char
+  BNE 80b
+90:
   POP {PC}
diff --git a/assembler/opcode.s b/assembler/opcode.s
new file mode 100644
index 0000000..f6e93c4
--- /dev/null
+++ b/assembler/opcode.s
@@ -0,0 +1,283 @@
+// choice encoding:
+
+// end of choices
+//      3  2  1  0
+//   |-------------
+// 0 | __ __ __ 00 
+// 4 | __ __ __ __
+
+// parse instruction
+//      3  2  1  0
+//   |-------------
+// 0 | __ __ 00 AA      // AA == ascii char (non-zero)
+// 4 | XX XX XX XX      // XX XX XX XX == parse instruction
+
+// new choice offset
+//      3  2  1  0
+//   |-------------
+// 0 | __ __ YY AA      // AA == ascii char (YY == non zero)
+// 4 | XX XX XX XX      // XX XX == new address
+
+.syntax unified
+.cpu cortex-m0plus
+.thumb
+
+.type opcode, %function
+.global opcode
+
+// R4: start of choices, final result (parse instruction)
+// R5: choice pointer
+opcode:
+  PUSH {LR}
+  ADR R4, start     // start at the start
+get_match:
+  BL get_char
+  MOV R5, R4        // reset choice pointer
+  B first_time      // don't increment pointer on first time
+next_choice:
+  ADDS R5, 8        // increment choice pointer
+first_time:
+  LDRB R1, [R5]     // load char
+  TST R1, R1        // test if char is zero
+  BEQ get_match     // if run out of options, get a new char
+  CMP R0, R1        // check if input matches char
+  BNE next_choice   // if not match try next option
+  BL uart_send      // echo char send if match
+  LDR R4, [R5, 4]   // load parse instruction or offset
+  LDRB R1, [R5, 1]  // load parse instruction vs offset byte
+  TST R1, R1        // check if zero
+  BNE get_match     // non-zero means it's an address
+  POP {PC}          // zero means it's a parse instruction
+
+.align 4
+
+start:
+  .byte 'A, 0x01 ; .hword 0x0000 ; .word A
+  // .byte 'B, 0x00 ; .hword 0x0000 ; .word 0xE3E0AE88 // BICS (register)     T1  01000 01110 Rm Rdn
+  // .byte 'C, 0x00 ; .hword 0x0000 ; .word 0xE3E0AB88 // CMN (register)      T1  01000 01011 Rm Rn
+  // .byte 'D, 0x00 ; .hword 0x0000 ; .word 0x00C8E884 // MOVSI (immediate)   T1  00100 Rd imm8
+  // .byte 'J, 0x00 ; .hword 0x0000 ; .word 0x00C8A09A // BEQ                 T1  11010 000 imm8
+  // .byte 'L, 0x00 ; .hword 0x0000 ; .word 0xD5E3E08D // LDRI5 (immediate)   T1  01101 imm5 Rn Rt
+  // .byte 'P, 0x00 ; .hword 0x0000 ; .word 0x0000B496 // PUSHLR              T1  10110 10100 000000
+  // .byte 'R, 0x00 ; .hword 0x0000 ; .word 0xE3E0A788 // RORS (register)     T1  01000 00111 Rm Rdn
+  // .byte 'S, 0x00 ; .hword 0x0000 ; .word 0xD5E3E08C // STRI5 (immediate)   T1  01100 imm5 Rn Rt
+  // .byte 'T, 0x00 ; .hword 0x0000 ; .word 0xD5E3E080 // LSLSI (immediate)   T1  00000 imm5 Rm Rd
+  .word 0x00000000, 0x00000000
+
+A:
+  .byte 'A, 0x01 ; .hword 0x0000 ; .word AA
+  // .byte 'H, 0x01 ; .hword 0x0000 ; .word AH
+  .byte 'M, 0x00 ; .hword 0x0000 ; .word 0xE3E0AD88 // MULS                T1  01000 01101 Rn Rdm
+  .byte 'N, 0x00 ; .hword 0x0000 ; .word 0xE3E0A988 // NEG (immediate)     T1  01000 01001 Rn Rd
+  // .byte 'S, 0x01 ; .hword 0x0000 ; .word AS
+  .word 0x00000000, 0x00000000
+
+AA:
+  .byte 'A, 0x00 ; .hword 0x0000 ; .word 0x00C8E394 // ADR                 T1  10100 Rd imm8
+  .byte 'C, 0x00 ; .hword 0x0000 ; .word 0xE3E0A588 // ADCS (register)     T1  01000 00101 Rm Rdn
+  .byte 'I, 0x01 ; .hword 0x0000 ; .word AAI
+  .byte 'R, 0x01 ; .hword 0x0000 ; .word AAR
+  .word 0x00000000, 0x00000000
+
+AAI:
+  .byte '3, 0x00 ; .hword 0x0000 ; .word 0xD3E3E00E // ADDSI3 (immediate)  T1  0001110 imm3 Rn Rd
+  .byte '8, 0x00 ; .hword 0x0000 ; .word 0x00C8E386 // ADDSI8 (immediate)  T2  00110 Rdn imm8
+  .word 0x00000000, 0x00000000
+
+AAR:
+  .byte 'F, 0x00 ; .hword 0x0000 ; .word 0xE6E3E00C // ADDSR (register)    T1  0001100 Rm Rn Rd
+  .byte 'H, 0x00 ; .hword 0x0000 ; .word 0xF3E0B288 // ADDRHI (register)   T2  010001001 Rm4 Rdn
+  .byte 'L, 0x00 ; .hword 0x0000 ; .word 0xF3E0B088 // ADDRLO (register)   T2  010001000 Rm4 Rdn
+  .word 0x00000000, 0x00000000
+
+// ASC     SBCS (register)     T1  01000 00110 Rm Rdn    E3 E0 A6 88
+// ASI3    SUBSI3 (immediate)  T1  0001111 imm3 Rn Rd    D3 E3 E0 0F
+// ASI8    SUBSI8 (immediate)  T2  00111 Rdn imm8        00 C8 E8 87
+// ASR     SUBSR (register)    T1  0001101 Rm Rn Rd      E6 E3 E0 0D
+
+// AHI     SRSI (immediate)    T1  00010 imm5 Rm Rd      D5 E3 E0 82
+// AHR     SRSR (register)     T1  01000 00100 Rm Rdn    E3 E0 A4 88
+
+// Choice 2
+// AH
+// --I
+// --R
+
+// Choice 3
+// AS
+// --C
+// --I
+// --R
+
+// Choice 6
+// B 
+// -A
+// -C
+// -I
+// -O
+// -T
+// -X
+
+// Choice 3
+// C
+// -I
+// -N
+// -R
+
+// Choice 2
+// CR
+// --3
+// --4
+
+// Choice 2
+// CR4
+// ---H
+// ---L
+
+// Choice 4
+// D
+// -I
+// -R
+// -S
+// -U
+
+// Choice 3
+// DR
+// --F
+// --H
+// --L
+
+// Choice 2
+// DS
+// --B
+// --H
+
+// Choice 2
+// DU
+// --B
+// --H
+
+// Choice 12
+// J
+// -A
+// -E
+// -G
+// -H
+// -I
+// -L
+// -M
+// -N
+// -P
+// -R
+// -S
+// -V
+
+// Choice 2
+// JG
+// --E
+// --T
+
+// Choice 2
+// JH
+// --I
+// --S
+
+// Choice 2
+// JI
+// --H
+// --L
+
+// Choice 5
+// JL
+// --E
+// --O
+// --R
+// --S
+// --T
+
+// Choice 2
+// JV
+// --C
+// --S
+
+// Choice 6
+// L
+// -B
+// -H
+// -I
+// -L
+// -R
+// -S
+
+// Choice 2
+// LB
+// --I
+// --R
+
+// Choice 2
+// LH
+// --I
+// --R
+
+// Choice 2
+// LI
+// --5
+// --8
+
+// Choice 2
+// LS
+// --B
+// --H
+
+// Choice 2
+// P
+// -L
+// -P
+
+// Choice 2
+// R
+// -B
+// -R
+
+// Choice 3
+// RB
+// --H
+// --S
+// --W
+
+// Choice 4
+// S
+// -B
+// -H
+// -I
+// -R
+
+// Choice 2
+// SB
+// --I
+// --R
+
+// Choice 2
+// SH
+// --I
+// --R
+
+// Choice 2
+// SI
+// --5
+// --8
+
+// Choice 2
+// T
+// -L
+// -R
+
+// Choice 2
+// TL
+// --I
+// --R
+
+// Choice 2
+// TR
+// --I
+// --R
diff --git a/assembler/register.s b/assembler/register.s
index dd0e1ae..348d8cf 100644
--- a/assembler/register.s
+++ b/assembler/register.s
@@ -1,62 +1,55 @@
-register4:
+.syntax unified
+.cpu cortex-m0plus
+.thumb
+
+.type register, %function
+.global register
+
+register:
   PUSH {LR}
+  MOV R4, R0
+  LSLS R4, 1
+  ADDS R4, ('0 + 1)
 10:
   BL get_char
   MOVS R1, 'R
-  CMPS R0, R1
+  CMP R0, R1
   BNE 10b
+  BL uart_send
 20:
   BL get_char
   MOVS R1, '0
-  CMPS R0, R1
+  CMP R0, R1
   BLO 20b
-  MOVS R1, '9
-  CMPS R0, R1
+  CMP R0, R4
   BHI 20b
+  BL uart_send
   MOVS R1, '1
-  CMPS R0, R1
+  CMP R0, R1
   BNE 30f
+  MOVS R1, '7
+  CMP R4, R1
+  BEQ 30f
 50:
   BL get_char
+  // CMP R0, '  // space is stop char
   BEQ 60f
   MOVS R1, '0
-  CMPS R0, R1
+  CMP R0, R1
   BLO 50b
   MOVS R1, '5
-  CMPS R0, R1
+  CMP R0, R1
   BHI 50b
+  BL uart_send
   ADDS R0, 10
 30:
   SUBS R0, '0
-  MOVS R4, 0
-  ORRS R4, R0
+  MOV R4, R0
 40:
   BL get_char
+  // CMP R0, '  // space is stop char
   BNE 40b
   POP {PC}
 60:
   MOVS R4, 1
   POP {PC}
-
-register3:
-  PUSH {LR}
-10:
-  BL get_char
-  MOVS R1, 'R
-  CMPS R0, R1
-  BNE 10b
-20:
-  BL get_char
-  MOVS R1, '0
-  CMPS R0, R1
-  BLO 20b
-  MOVS R1, '7
-  CMPS R0, R1
-  BHI 20b
-30:
-  SUBS R0, '0
-  MOVS R4, 0
-  ORRS R4, R0
-  BL get_char
-  BNE 40b
-  POP {PC}
diff --git a/assembler/string.s b/assembler/string.s
deleted file mode 100644
index 0ed7ca3..0000000
--- a/assembler/string.s
+++ /dev/null
@@ -1,16 +0,0 @@
-// R0 : string1 address
-// R1 : string2 address
-// Result in R0
-string_compare:
-  MOVS R4, 0
-loop:
-  LDRB R2, [R0, R4]
-  LDRB R3, [R1, R4]
-  CMP R2, R3
-  BNE done
-  CMP R2, 0
-  BEQ done
-  ADDS R4, 1
-  B loop
-done:
-  BX LR
-- 
cgit v1.2.3