From 80d8f3ae48255f786bd4d52a1819ea0c339f6946 Mon Sep 17 00:00:00 2001 From: Jacques Comeaux Date: Fri, 23 Aug 2024 19:46:17 -0500 Subject: Add register parser and binary search for opcodes --- newasm/Makefile | 8 ++++++- newasm/input.s | 1 + newasm/label.s | 30 ------------------------ newasm/main.s | 23 +++++++++--------- newasm/opcode.s | 40 ------------------------------- newasm/optable.s | 57 +++++++++++++++++++++++++++++++++++++++++++++ newasm/parsers/decimal.s | 39 +++++++++++++++++++++++++++++++ newasm/parsers/label.s | 30 ++++++++++++++++++++++++ newasm/parsers/opcode.s | 40 +++++++++++++++++++++++++++++++ newasm/parsers/register.s | 46 ++++++++++++++++++++++++++++++++++++ newasm/parsers/symbol.s | 48 ++++++++++++++++++++++++++++++++++++++ newasm/parsers/whitespace.s | 23 ++++++++++++++++++ newasm/statement.s | 17 ++++++++++---- newasm/string.s | 6 +++-- newasm/symbol.s | 48 -------------------------------------- newasm/whitespace.s | 21 ----------------- 16 files changed, 320 insertions(+), 157 deletions(-) delete mode 100644 newasm/label.s delete mode 100644 newasm/opcode.s create mode 100644 newasm/optable.s create mode 100644 newasm/parsers/decimal.s create mode 100644 newasm/parsers/label.s create mode 100644 newasm/parsers/opcode.s create mode 100644 newasm/parsers/register.s create mode 100644 newasm/parsers/symbol.s create mode 100644 newasm/parsers/whitespace.s delete mode 100644 newasm/symbol.s delete mode 100644 newasm/whitespace.s diff --git a/newasm/Makefile b/newasm/Makefile index 7b31eb4..fd057fe 100644 --- a/newasm/Makefile +++ b/newasm/Makefile @@ -11,7 +11,13 @@ parse.bin: parse.elf arm-none-eabi-objcopy -O binary parse.elf parse.bin objects = main.o uart.o data.o string.o input.o \ - statement.o whitespace.o label.o symbol.o opcode.o + statement.o optable.o \ + parsers/whitespace.o \ + parsers/label.o \ + parsers/symbol.o \ + parsers/opcode.o \ + parsers/register.o \ + parsers/decimal.o parse.elf: pico_bin.ld $(objects) arm-none-eabi-ld -T pico_bin.ld -o parse.elf $(objects) diff --git a/newasm/input.s b/newasm/input.s index f3db1e9..7644a81 100644 --- a/newasm/input.s +++ b/newasm/input.s @@ -8,6 +8,7 @@ getline: PUSH {R4, R5, LR} // save registers MOVS R4, R0 // copy buffer start address + MOVS R5, 0 // beginning offset next: BL uart_recv // get a char CMP R0, 0x03 // end of text (^C) BEQ cancel // don't submit, start on next line diff --git a/newasm/label.s b/newasm/label.s deleted file mode 100644 index af2f641..0000000 --- a/newasm/label.s +++ /dev/null @@ -1,30 +0,0 @@ -.syntax unified -.cpu cortex-m0plus -.thumb - -.type label, %function -.global label - -// 1 unexpected begin char -// 2 doesn't end with colon - -// R1 input buffer -// R2 output buffer - -label: PUSH {LR} - LDRB R0, [R1] // get a char - CMP R0, 0x61 // a - BLO 1f - CMP R0, 0x7A // z - BLS 2f -1: MOVS R0, #1 // return code 1 (expected lowercase) - POP {PC} -2: BL symbol - LDRB R0, [R1] // get a char - CMP R0, ': // colon - BEQ 3f - MOVS R0, #2 // return code 2 (expected colon) - POP {PC} -3: ADDS R1, 1 // consume the colon - MOVS R0, #0 // return code 0 (success) - POP {PC} diff --git a/newasm/main.s b/newasm/main.s index 57a1b98..a27fec9 100644 --- a/newasm/main.s +++ b/newasm/main.s @@ -5,33 +5,34 @@ .type main, %function .global main, strbuf -main: LDR R4, =0x20002000 +main: LDR R5, =0x20002000 BL uart_recv loop: BL prompt LDR R0, inpbuf BL getline LDR R0, inpbuf BL putstrln - LDR R1, inpbuf + LDR R4, inpbuf LDR R2, strbuf MOVS R0, 0 STRB R0, [R2] BL statement BNE bad + good: ADR R0, success PUSH {R1} BL putstrln - POP {R0} - BL putstrln - LDR R0, strbuf + MOVS R0, R4 BL putstrln + POP {R0} + BL send_hex + LDR R0, =crlf + BL putstr B loop + bad: ADR R0, fail - PUSH {R1} - BL putstrln - POP {R0} BL putstrln - LDR R0, strbuf + MOVS R0, R4 BL putstrln B loop never: BL uart_recv @@ -39,11 +40,11 @@ never: BL uart_recv BX R0 prompt: PUSH {LR} - MOVS R0, R4 + MOVS R0, R5 BL send_hex MOVS R0, ' BL uart_send - LDR R0, [R4] + LDR R0, [R5] BL send_hex MOVS R0, ' BL uart_send diff --git a/newasm/opcode.s b/newasm/opcode.s deleted file mode 100644 index 71a5201..0000000 --- a/newasm/opcode.s +++ /dev/null @@ -1,40 +0,0 @@ -.syntax unified -.cpu cortex-m0plus -.thumb - -.type opcode, %function -.global opcode - -// 1 unexpected first char - -// R1 input buffer -// R2 output buffer - -opcode: PUSH {LR} - LDRB R0, [R1] // get a char - CMP R0, 0x41 // A - BLO 1f - CMP R0, 0x5A // Z - BLS 2f -1: MOVS R0, #1 // unexpected char - POP {PC} -2: ADDS R1, 1 // consume the character - STRB R0, [R2] // store in temp buffer - ADDS R2, 1 // advance temp buffer pointer - LDRB R0, [R1] // get another character - BL goodchar // check if valid symbol char - BEQ 2b // if so keep getting chars - MOVS R0, #0 // return code success - STRB R0, [R2] // write null byte - POP {PC} - -goodchar: CMP R0, '0 - BLO bad - CMP R0, '9 - BLS good - CMP R0, 'A - BLO bad - CMP R0, 'Z - BHI bad -good: CMP R0, R0 -bad: BX LR diff --git a/newasm/optable.s b/newasm/optable.s new file mode 100644 index 0000000..a063f02 --- /dev/null +++ b/newasm/optable.s @@ -0,0 +1,57 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type lookup, %function +.global lookup + +// 1 unexpected char +// 2 unexpected end of input + +// input: +// R0 input string (unboxed) + +// output: +// R0 return code +// R1 instruction template +// R2 instruction type +// R3 special code + +lookup: PUSH {R4, R5, R6} + ADR R4, op_table// op table base address + MOVS R5, 0 // begin pointer + MOVS R6, 7 // end pointer +loop: CMP R5, R6 // if begin == end then range = 0 + BEQ error + ADDS R2, R5, R6 // get sum + LSRS R2, 1 // divide by two +no_jiggle:LSLS R3, R2, 3 // times 8 (index to offset) + LDR R1, [R4, R3]// read opcode from optable + CMP R0, R1 // compare opcode to input + BLO lower // if lower + BHI higher // if higher +found: LSLS R2, 3 // times 8 (index to offset) + ADDS R4, R2 // select row + MOVS R0, 0 // success return code + LDRH R1, [R4, 4] // get machine code template + LDRB R2, [R4, 6] // get instruction type + LDRB R3, [R4, 7] // get special code + POP {R4, R5, R6} + BX LR // result in R2 +lower: MOVS R6, R2 // update end pointer + B loop +higher: ADDS R5, R2, 1 // update begin pointer + B loop +error: MOVS R0, 1 // return code 1 (not found) + POP {R4, R5, R6} + BX LR + + .align 8 +op_table: + .ascii "ADD3"; .hword 0x1C00; .byte 0x01, 0x03 // instr type 1, special code 3 (imm width) + .ascii "ADD8"; .hword 0x3000; .byte 0x02, 0x08 // instr type 2, special code 8 (imm width) + .ascii "ASPI"; .hword 0xB000; .byte 0x04, 0x07 // instr type 4, special code 7 + .ascii "ASRI"; .hword 0x1000; .byte 0x01, 0x05 // instr type 1, special code 5 (imm width) + .ascii "ADCS"; .hword 0x4140; .byte 0x00, 0x00 // instr type 0, special code 0 + .ascii "ADDS"; .hword 0x1400; .byte 0x03, 0x00 // instr type 3, special code 0 + .ascii "BKPT"; .hword 0xBE00; .byte 0x04, 0x08 // instr type 4, special code 8 diff --git a/newasm/parsers/decimal.s b/newasm/parsers/decimal.s new file mode 100644 index 0000000..a1516d2 --- /dev/null +++ b/newasm/parsers/decimal.s @@ -0,0 +1,39 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type decimal, %function +.global decimal + +// 1 not a digit + +// R4 input stream +// R2 output value + +decimal: LDRB R0, [R4] // get a char + CMP R0, '0 // check if zero + BNE notzero + ADDS R4, 1 // consume the char + MOVS R2, 0 // return value of zero +success: MOVS R0, #0 // return code zero (success) + BX LR +notzero: CMP R0, '1 // if not [1-9] then error + BLO bad + CMP R0, '9 + BHI bad + ADDS R4, 1 // consume the first digit + SUBS R0, '0 // calculate the value + MOVS R2, R0 // store it in R2 +loop: LDRB R0, [R4] // get another char + CMP R0, '0 // if not [0-9] then done + BLO success + CMP R0, '9 + BHI success + ADDS R4, 1 // consume the additional digit + SUBS R0, '0 // calculate the value + MOVS R3, 10 // base 10 + MULS R2, R3 // shift result by one decimal place + ADDS R2, R0 // accumulate into R2 + B loop // keep getting digits +bad: MOVS R0, #1 // return code 1 (not a digit) + BX LR diff --git a/newasm/parsers/label.s b/newasm/parsers/label.s new file mode 100644 index 0000000..4770e6c --- /dev/null +++ b/newasm/parsers/label.s @@ -0,0 +1,30 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type label, %function +.global label + +// 1 unexpected begin char +// 2 doesn't end with colon + +// R4 input buffer +// R2 output buffer + +label: PUSH {LR} + LDRB R0, [R4] // get a char + CMP R0, 0x61 // a + BLO 1f + CMP R0, 0x7A // z + BLS 2f +1: MOVS R0, #1 // return code 1 (expected lowercase) + POP {PC} +2: BL symbol + LDRB R0, [R4] // get a char + CMP R0, ': // colon + BEQ 3f + MOVS R0, #2 // return code 2 (expected colon) + POP {PC} +3: ADDS R4, 1 // consume the colon + MOVS R0, #0 // return code 0 (success) + POP {PC} diff --git a/newasm/parsers/opcode.s b/newasm/parsers/opcode.s new file mode 100644 index 0000000..8ec327d --- /dev/null +++ b/newasm/parsers/opcode.s @@ -0,0 +1,40 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type opcode, %function +.global opcode + +// 1 unexpected first char + +// R4 input buffer +// R2 output buffer + +opcode: PUSH {LR} + LDRB R0, [R4] // get a char + CMP R0, 0x41 // A + BLO 1f + CMP R0, 0x5A // Z + BLS 2f +1: MOVS R0, #1 // unexpected char + POP {PC} +2: ADDS R4, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R4] // get another character + BL goodchar // check if valid symbol char + BEQ 2b // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/parsers/register.s b/newasm/parsers/register.s new file mode 100644 index 0000000..48ebf7a --- /dev/null +++ b/newasm/parsers/register.s @@ -0,0 +1,46 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type register, %function +.global register + +// 1 unexpected char +// 2 invalid general-purpose register number + +// R4 input buffer +// R2 output buffer + +register: PUSH {LR} + LDRB R0, [R4] +r_test: CMP R0, 'R + BNE special + ADDS R4, 1 // consume R + BL decimal // consume a decimal number (result in R2) + BEQ validate + POP {PC} // error code already in R0 +validate: CMP R2, #12 // general purpose registers 0-12 + BLS success + MOVS R0, #2 // invalid register number error code + POP {PC} +special: LDRH R0, [R4] // get two bytes from input stream + ADR R2, table // get address of table + MOVS R3, 0 // set table offset to 0 +loop: LDRH R1, [R2, R3] // get two bytes from table at current offset + CMP R0, R1 // compare input to table row + BEQ done // if equal then done + ADDS R3, 2 // increment table offset + CMP R3, 6 // compare offset to table size + BLO loop // loop until end of table + MOVS R0, #1 // return code 1 (unexpected char) + POP {PC} +done: ADDS R4, 2 // consume two chars + LSRS R3, 1 // divide table offset by two to row + ADDS R3, 13 // add 13 to get register number + MOVS R2, R3 +success: MOVS R0, #0 // return code 0 (success) + POP {PC} + +table: .ascii "SP" + .ascii "LR" + .ascii "PC" diff --git a/newasm/parsers/symbol.s b/newasm/parsers/symbol.s new file mode 100644 index 0000000..f57d493 --- /dev/null +++ b/newasm/parsers/symbol.s @@ -0,0 +1,48 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type symbol, %function +.global symbol + +// 1 unexpected begin char + +// R4 input buffer +// R2 output buffer + +symbol: PUSH {LR} + LDRB R0, [R4] // get a char + BL goodchar // check if valid symbol char + BEQ loop + MOVS R0, #1 // unexpected begin char + POP {PC} +loop: ADDS R4, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R4] // get another character + BL goodchar // check if valid symbol char + BEQ loop // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '$ + BEQ good + CMP R0, '. + BEQ good + CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BLS good + CMP R0, '_ + BEQ good + CMP R0, 'a + BLO bad + CMP R0, 'z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/parsers/whitespace.s b/newasm/parsers/whitespace.s new file mode 100644 index 0000000..708cab3 --- /dev/null +++ b/newasm/parsers/whitespace.s @@ -0,0 +1,23 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type whitespace, %function +.global whitespace + +// 1 unexpected char + +// R4 input buffer + +whitespace: + LDRB R0, [R4] // get a char + CMP R0, ' // space + BEQ 1f + MOVS R0, #1 // return code 1 (unexpected char) + BX LR +1: ADDS R4, 1 // consume the character + LDRB R0, [R4] // get another character + CMP R0, ' // check if space + BEQ 1b // if so keep getting chars + MOVS R0, #0 // return code 0 (success) + BX LR diff --git a/newasm/statement.s b/newasm/statement.s index 83f8d8d..4a199a6 100644 --- a/newasm/statement.s +++ b/newasm/statement.s @@ -7,24 +7,31 @@ // 1 reported an error -// R1 input buffer +// R4 input buffer // R2 output buffer statement:PUSH {LR} BL whitespace + PUSH {R2} BL label + POP {R2} CMP R0, #2 // check for error code 2 (no colon) BNE 1f ADR R0, no_colon B err_exit 1: BL whitespace + PUSH {R2} BL opcode + POP {R2} BEQ 2f ADR R0, no_opcode B err_exit -2: MOVS R0, #0 - POP {PC} - +2: LDR R0, [R2] + BL lookup + BEQ 3f + ADR R0, not_found + B err_exit +3: POP {PC} // success code already in R0 err_exit: BL putstrln MOVS R0, #1 // return code 1 (there was an error) POP {PC} @@ -33,3 +40,5 @@ err_exit: BL putstrln no_colon: .asciz "Error: Expected colon at end of label" .align 4 no_opcode: .asciz "Error: Expected an opcode" + .align 4 +not_found: .asciz "Error: Opcode not found" diff --git a/newasm/string.s b/newasm/string.s index c77958b..d897a98 100644 --- a/newasm/string.s +++ b/newasm/string.s @@ -28,7 +28,8 @@ putstrln: BL putstr POP {PC} -cmpstr: MOVS R4, 0 +cmpstr: PUSH {R4} + MOVS R4, 0 1: LDRB R2, [R0, R4] LDRB R3, [R1, R4] CMP R2, R3 @@ -37,4 +38,5 @@ cmpstr: MOVS R4, 0 BEQ 2f ADDS R4, 1 B 1b -2: BX LR +2: POP {R4} + BX LR diff --git a/newasm/symbol.s b/newasm/symbol.s deleted file mode 100644 index 0677f6a..0000000 --- a/newasm/symbol.s +++ /dev/null @@ -1,48 +0,0 @@ -.syntax unified -.cpu cortex-m0plus -.thumb - -.type symbol, %function -.global symbol - -// 1 unexpected begin char - -// R1 input buffer -// R2 output buffer - -symbol: PUSH {LR} - LDRB R0, [R1] // get a char - BL goodchar // check if valid symbol char - BEQ loop - MOVS R0, #1 // unexpected begin char - POP {PC} -loop: ADDS R1, 1 // consume the character - STRB R0, [R2] // store in temp buffer - ADDS R2, 1 // advance temp buffer pointer - LDRB R0, [R1] // get another character - BL goodchar // check if valid symbol char - BEQ loop // if so keep getting chars - MOVS R0, #0 // return code success - STRB R0, [R2] // write null byte - POP {PC} - -goodchar: CMP R0, '$ - BEQ good - CMP R0, '. - BEQ good - CMP R0, '0 - BLO bad - CMP R0, '9 - BLS good - CMP R0, 'A - BLO bad - CMP R0, 'Z - BLS good - CMP R0, '_ - BEQ good - CMP R0, 'a - BLO bad - CMP R0, 'z - BHI bad -good: CMP R0, R0 -bad: BX LR diff --git a/newasm/whitespace.s b/newasm/whitespace.s deleted file mode 100644 index f4d8d9f..0000000 --- a/newasm/whitespace.s +++ /dev/null @@ -1,21 +0,0 @@ -.syntax unified -.cpu cortex-m0plus -.thumb - -.type whitespace, %function -.global whitespace - -// 1 unexpected char - -whitespace: - LDRB R0, [R1] // get a char - CMP R0, ' // space - BEQ 1f - MOVS R0, #1 // return code 1 (unexpected char) - BX LR -1: ADDS R1, 1 // consume the character - LDRB R0, [R1] // get another character - CMP R0, ' // check if space - BEQ 1b // if so keep getting chars - MOVS R0, #0 // return code 0 (success) - BX LR -- cgit v1.2.3