From ccc04526f14764d856ad37b1f517308fccf886a6 Mon Sep 17 00:00:00 2001 From: Jacques Comeaux Date: Wed, 21 Aug 2024 03:18:35 -0500 Subject: Add label and opcode parsers --- newasm/Makefile | 3 ++- newasm/input.s | 6 +++--- newasm/label.s | 30 ++++++++++++++++++++++++++++++ newasm/main.s | 47 ++++++++++++++++++++++++++++++++++++----------- newasm/opcode.s | 40 ++++++++++++++++++++++++++++++++++++++++ newasm/statement.s | 35 +++++++++++++++++++++++++++++++++++ newasm/string.s | 10 +++++++++- newasm/symbol.s | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ newasm/whitespace.s | 21 +++++++++++++++++++++ 9 files changed, 224 insertions(+), 16 deletions(-) create mode 100644 newasm/label.s create mode 100644 newasm/opcode.s create mode 100644 newasm/statement.s create mode 100644 newasm/symbol.s create mode 100644 newasm/whitespace.s diff --git a/newasm/Makefile b/newasm/Makefile index 3302630..7b31eb4 100644 --- a/newasm/Makefile +++ b/newasm/Makefile @@ -10,7 +10,8 @@ parse.he: parse.bin parse.bin: parse.elf arm-none-eabi-objcopy -O binary parse.elf parse.bin -objects = main.o uart.o data.o string.o input.o +objects = main.o uart.o data.o string.o input.o \ + statement.o whitespace.o label.o symbol.o opcode.o parse.elf: pico_bin.ld $(objects) arm-none-eabi-ld -T pico_bin.ld -o parse.elf $(objects) diff --git a/newasm/input.s b/newasm/input.s index d9c8c73..f3db1e9 100644 --- a/newasm/input.s +++ b/newasm/input.s @@ -7,7 +7,7 @@ getline: PUSH {R4, R5, LR} // save registers - MOVS R4, R0 // put strbuf in R4 + MOVS R4, R0 // copy buffer start address next: BL uart_recv // get a char CMP R0, 0x03 // end of text (^C) BEQ cancel // don't submit, start on next line @@ -27,7 +27,7 @@ next: BL uart_recv // get a char BHI next good: BL uart_send // echo the printable char STRB R0, [R4, R5] // write the printable char - ADDS R5, 1 // increment strbuf offset + ADDS R5, 1 // increment buffer offset B next // get another char cancel: MOVS R5, 0 // reset offset STRB R5, [R4, R5] // write empty string @@ -77,7 +77,7 @@ retry: MOVS R0, 0x08 // backspace SUBS R2, 1 // decrement amount B 1b // repeat 2: STRB R2, [R4, R5] // terminate string - MOVS R0, R4 // copy strbuf address + MOVS R0, R4 // copy buffer start address BL putstr // print backspace sequence MOVS R5, 0 // null byte and reset offset STRB R5, [R4] // write empty string diff --git a/newasm/label.s b/newasm/label.s new file mode 100644 index 0000000..af2f641 --- /dev/null +++ b/newasm/label.s @@ -0,0 +1,30 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type label, %function +.global label + +// 1 unexpected begin char +// 2 doesn't end with colon + +// R1 input buffer +// R2 output buffer + +label: PUSH {LR} + LDRB R0, [R1] // get a char + CMP R0, 0x61 // a + BLO 1f + CMP R0, 0x7A // z + BLS 2f +1: MOVS R0, #1 // return code 1 (expected lowercase) + POP {PC} +2: BL symbol + LDRB R0, [R1] // get a char + CMP R0, ': // colon + BEQ 3f + MOVS R0, #2 // return code 2 (expected colon) + POP {PC} +3: ADDS R1, 1 // consume the colon + MOVS R0, #0 // return code 0 (success) + POP {PC} diff --git a/newasm/main.s b/newasm/main.s index 7f3c038..57a1b98 100644 --- a/newasm/main.s +++ b/newasm/main.s @@ -5,30 +5,55 @@ .type main, %function .global main, strbuf -main: LDR R6, =0x20002000 +main: LDR R4, =0x20002000 BL uart_recv loop: BL prompt - LDR R0, strbuf + LDR R0, inpbuf BL getline + LDR R0, inpbuf + BL putstrln + LDR R1, inpbuf + LDR R2, strbuf + MOVS R0, 0 + STRB R0, [R2] + BL statement + BNE bad +good: ADR R0, success + PUSH {R1} + BL putstrln + POP {R0} + BL putstrln LDR R0, strbuf - BL putstr - LDR R0, =crlf - BL putstr + BL putstrln B loop - BL uart_recv +bad: ADR R0, fail + PUSH {R1} + BL putstrln + POP {R0} + BL putstrln + LDR R0, strbuf + BL putstrln + B loop +never: BL uart_recv LDR R0, =0x20000001 BX R0 - .align 4 -strbuf: .word 0x20001F00 - prompt: PUSH {LR} - MOVS R0, R6 + MOVS R0, R4 BL send_hex MOVS R0, ' BL uart_send - LDR R0, [R6] + LDR R0, [R4] BL send_hex MOVS R0, ' BL uart_send POP {PC} + + .align 4 +inpbuf: .word 0x20001F00 // TODO getline buffer overflow +strbuf: .word 0x20001F80 + + .align 4 +success: .asciz "The parser suceeded" + .align 4 +fail: .asciz "The parser failed" diff --git a/newasm/opcode.s b/newasm/opcode.s new file mode 100644 index 0000000..71a5201 --- /dev/null +++ b/newasm/opcode.s @@ -0,0 +1,40 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type opcode, %function +.global opcode + +// 1 unexpected first char + +// R1 input buffer +// R2 output buffer + +opcode: PUSH {LR} + LDRB R0, [R1] // get a char + CMP R0, 0x41 // A + BLO 1f + CMP R0, 0x5A // Z + BLS 2f +1: MOVS R0, #1 // unexpected char + POP {PC} +2: ADDS R1, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R1] // get another character + BL goodchar // check if valid symbol char + BEQ 2b // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/statement.s b/newasm/statement.s new file mode 100644 index 0000000..83f8d8d --- /dev/null +++ b/newasm/statement.s @@ -0,0 +1,35 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type statement, %function +.global statement + +// 1 reported an error + +// R1 input buffer +// R2 output buffer + +statement:PUSH {LR} + BL whitespace + BL label + CMP R0, #2 // check for error code 2 (no colon) + BNE 1f + ADR R0, no_colon + B err_exit +1: BL whitespace + BL opcode + BEQ 2f + ADR R0, no_opcode + B err_exit +2: MOVS R0, #0 + POP {PC} + +err_exit: BL putstrln + MOVS R0, #1 // return code 1 (there was an error) + POP {PC} + + .align 4 +no_colon: .asciz "Error: Expected colon at end of label" + .align 4 +no_opcode: .asciz "Error: Expected an opcode" diff --git a/newasm/string.s b/newasm/string.s index c1dad1c..c77958b 100644 --- a/newasm/string.s +++ b/newasm/string.s @@ -3,9 +3,10 @@ .thumb .type putstr, %function +.type putstrln, %function .type cmpstr, %function -.global putstr, cmpstr +.global putstr, putstrln, cmpstr putstr: LDR R3, =0x40034000 MOVS R2, 0x20 @@ -20,6 +21,13 @@ putstr: LDR R3, =0x40034000 B 1b 2: BX LR +putstrln: + PUSH {LR} + BL putstr + LDR R0, =crlf + BL putstr + POP {PC} + cmpstr: MOVS R4, 0 1: LDRB R2, [R0, R4] LDRB R3, [R1, R4] diff --git a/newasm/symbol.s b/newasm/symbol.s new file mode 100644 index 0000000..0677f6a --- /dev/null +++ b/newasm/symbol.s @@ -0,0 +1,48 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type symbol, %function +.global symbol + +// 1 unexpected begin char + +// R1 input buffer +// R2 output buffer + +symbol: PUSH {LR} + LDRB R0, [R1] // get a char + BL goodchar // check if valid symbol char + BEQ loop + MOVS R0, #1 // unexpected begin char + POP {PC} +loop: ADDS R1, 1 // consume the character + STRB R0, [R2] // store in temp buffer + ADDS R2, 1 // advance temp buffer pointer + LDRB R0, [R1] // get another character + BL goodchar // check if valid symbol char + BEQ loop // if so keep getting chars + MOVS R0, #0 // return code success + STRB R0, [R2] // write null byte + POP {PC} + +goodchar: CMP R0, '$ + BEQ good + CMP R0, '. + BEQ good + CMP R0, '0 + BLO bad + CMP R0, '9 + BLS good + CMP R0, 'A + BLO bad + CMP R0, 'Z + BLS good + CMP R0, '_ + BEQ good + CMP R0, 'a + BLO bad + CMP R0, 'z + BHI bad +good: CMP R0, R0 +bad: BX LR diff --git a/newasm/whitespace.s b/newasm/whitespace.s new file mode 100644 index 0000000..f4d8d9f --- /dev/null +++ b/newasm/whitespace.s @@ -0,0 +1,21 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type whitespace, %function +.global whitespace + +// 1 unexpected char + +whitespace: + LDRB R0, [R1] // get a char + CMP R0, ' // space + BEQ 1f + MOVS R0, #1 // return code 1 (unexpected char) + BX LR +1: ADDS R1, 1 // consume the character + LDRB R0, [R1] // get another character + CMP R0, ' // check if space + BEQ 1b // if so keep getting chars + MOVS R0, #0 // return code 0 (success) + BX LR -- cgit v1.2.3