From 80d8f3ae48255f786bd4d52a1819ea0c339f6946 Mon Sep 17 00:00:00 2001 From: Jacques Comeaux Date: Fri, 23 Aug 2024 19:46:17 -0500 Subject: Add register parser and binary search for opcodes --- newasm/parsers/decimal.s | 39 ++++++++++++++++++++++++++++++++++++ newasm/parsers/label.s | 30 ++++++++++++++++++++++++++++ newasm/parsers/opcode.s | 40 +++++++++++++++++++++++++++++++++++++ newasm/parsers/register.s | 46 +++++++++++++++++++++++++++++++++++++++++++ newasm/parsers/symbol.s | 48 +++++++++++++++++++++++++++++++++++++++++++++ newasm/parsers/whitespace.s | 23 ++++++++++++++++++++++ 6 files changed, 226 insertions(+) create mode 100644 newasm/parsers/decimal.s create mode 100644 newasm/parsers/label.s create mode 100644 newasm/parsers/opcode.s create mode 100644 newasm/parsers/register.s create mode 100644 newasm/parsers/symbol.s create mode 100644 newasm/parsers/whitespace.s (limited to 'newasm/parsers') diff --git a/newasm/parsers/decimal.s b/newasm/parsers/decimal.s new file mode 100644 index 0000000..a1516d2 --- /dev/null +++ b/newasm/parsers/decimal.s @@ -0,0 +1,39 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type decimal, %function +.global decimal + +// 1 not a digit + +// R4 input stream +// R2 output value + +decimal: LDRB R0, [R4] // get a char + CMP R0, '0 // check if zero + BNE notzero + ADDS R4, 1 // consume the char + MOVS R2, 0 // return value of zero +success: MOVS R0, #0 // return code zero (success) + BX LR +notzero: CMP R0, '1 // if not [1-9] then error + BLO bad + CMP R0, '9 + BHI bad + ADDS R4, 1 // consume the first digit + SUBS R0, '0 // calculate the value + MOVS R2, R0 // store it in R2 +loop: LDRB R0, [R4] // get another char + CMP R0, '0 // if not [0-9] then done + BLO success + CMP R0, '9 + BHI success + ADDS R4, 1 // consume the additional digit + SUBS R0, '0 // calculate the value + MOVS R3, 10 // base 10 + MULS R2, R3 // shift result by one decimal place + ADDS R2, R0 // accumulate into R2 + B loop // keep getting digits +bad: MOVS R0, #1 // return code 1 (not a digit) + BX LR diff --git a/newasm/parsers/label.s b/newasm/parsers/label.s new file mode 100644 index 0000000..4770e6c --- /dev/null +++ b/newasm/parsers/label.s @@ -0,0 +1,30 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type label, %function +.global label + +// 1 unexpected begin char +// 2 doesn't end with colon + +// R4 input buffer +// R2 output buffer + +label: PUSH {LR} + LDRB R0, [R4] // get a char + CMP R0, 0x61 // a + BLO 1f + CMP R0, 0x7A // z + BLS 2f +1: MOVS R0, #1 // return code 1 (expected lowercase) + POP {PC} +2: BL symbol + LDRB R0, [R4] // get a char + CMP R0, ': // colon + BEQ 3f + MOVS R0, #2 // return code 2 (expected colon) + POP {PC} +3: ADDS R4, 1 // consume the colon + MOVS R0, #0 // return code 0 (success) + POP {PC} diff --git a/newasm/parsers/opcode.s b/newasm/parsers/opcode.s new file mode 100644 index 0000000..8ec327d --- /dev/null +++ b/newasm/parsers/opcode.s @@ -0,0 +1,40 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type opcode, %function +.global opcode + +// 1 unexpected first char + +// R4 input buffer +// R2 output buffer + +opcode: PUSH {LR} + LDRB R0, [R4] // get a char + CMP R0, 0x41 // A + BLO 1f + CMP R0, 0x5A // Z + BLS 2f +1: MOVS R0, #1 // unexpected char + POP {PC} +2: ADDS R4, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R4] // get another character + BL goodchar // check if valid symbol char + BEQ 2b // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/parsers/register.s b/newasm/parsers/register.s new file mode 100644 index 0000000..48ebf7a --- /dev/null +++ b/newasm/parsers/register.s @@ -0,0 +1,46 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type register, %function +.global register + +// 1 unexpected char +// 2 invalid general-purpose register number + +// R4 input buffer +// R2 output buffer + +register: PUSH {LR} + LDRB R0, [R4] +r_test: CMP R0, 'R + BNE special + ADDS R4, 1 // consume R + BL decimal // consume a decimal number (result in R2) + BEQ validate + POP {PC} // error code already in R0 +validate: CMP R2, #12 // general purpose registers 0-12 + BLS success + MOVS R0, #2 // invalid register number error code + POP {PC} +special: LDRH R0, [R4] // get two bytes from input stream + ADR R2, table // get address of table + MOVS R3, 0 // set table offset to 0 +loop: LDRH R1, [R2, R3] // get two bytes from table at current offset + CMP R0, R1 // compare input to table row + BEQ done // if equal then done + ADDS R3, 2 // increment table offset + CMP R3, 6 // compare offset to table size + BLO loop // loop until end of table + MOVS R0, #1 // return code 1 (unexpected char) + POP {PC} +done: ADDS R4, 2 // consume two chars + LSRS R3, 1 // divide table offset by two to row + ADDS R3, 13 // add 13 to get register number + MOVS R2, R3 +success: MOVS R0, #0 // return code 0 (success) + POP {PC} + +table: .ascii "SP" + .ascii "LR" + .ascii "PC" diff --git a/newasm/parsers/symbol.s b/newasm/parsers/symbol.s new file mode 100644 index 0000000..f57d493 --- /dev/null +++ b/newasm/parsers/symbol.s @@ -0,0 +1,48 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type symbol, %function +.global symbol + +// 1 unexpected begin char + +// R4 input buffer +// R2 output buffer + +symbol: PUSH {LR} + LDRB R0, [R4] // get a char + BL goodchar // check if valid symbol char + BEQ loop + MOVS R0, #1 // unexpected begin char + POP {PC} +loop: ADDS R4, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R4] // get another character + BL goodchar // check if valid symbol char + BEQ loop // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '$ + BEQ good + CMP R0, '. + BEQ good + CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BLS good + CMP R0, '_ + BEQ good + CMP R0, 'a + BLO bad + CMP R0, 'z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/parsers/whitespace.s b/newasm/parsers/whitespace.s new file mode 100644 index 0000000..708cab3 --- /dev/null +++ b/newasm/parsers/whitespace.s @@ -0,0 +1,23 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type whitespace, %function +.global whitespace + +// 1 unexpected char + +// R4 input buffer + +whitespace: + LDRB R0, [R4] // get a char + CMP R0, ' // space + BEQ 1f + MOVS R0, #1 // return code 1 (unexpected char) + BX LR +1: ADDS R4, 1 // consume the character + LDRB R0, [R4] // get another character + CMP R0, ' // check if space + BEQ 1b // if so keep getting chars + MOVS R0, #0 // return code 0 (success) + BX LR -- cgit v1.2.3