From ee01f1a7f1e10be78bcceb4f0f42aa352a6a588f Mon Sep 17 00:00:00 2001 From: Jacques Comeaux Date: Sat, 24 Aug 2024 01:14:04 -0500 Subject: Add parsers for basic instruction types --- newasm/Makefile | 28 ++++++++++---------- newasm/errors.s | 30 +++++++++++++++++++++ newasm/imm.s | 27 +++++++++++++++++++ newasm/main.s | 45 ++++++++++++++------------------ newasm/optable.s | 9 +++---- newasm/parsers/decimal.s | 4 +-- newasm/parsers/immediate.s | 13 ++++++++++ newasm/parsers/label.s | 5 +--- newasm/parsers/register.s | 13 +++++----- newasm/regimm.s | 37 ++++++++++++++++++++++++++ newasm/regreg.s | 29 +++++++++++++++++++++ newasm/regregimm.s | 47 +++++++++++++++++++++++++++++++++ newasm/regregreg.s | 44 +++++++++++++++++++++++++++++++ newasm/statement.s | 65 +++++++++++++++++++++++++--------------------- 14 files changed, 309 insertions(+), 87 deletions(-) create mode 100644 newasm/errors.s create mode 100644 newasm/imm.s create mode 100644 newasm/parsers/immediate.s create mode 100644 newasm/regimm.s create mode 100644 newasm/regreg.s create mode 100644 newasm/regregimm.s create mode 100644 newasm/regregreg.s (limited to 'newasm') diff --git a/newasm/Makefile b/newasm/Makefile index fd057fe..78a0828 100644 --- a/newasm/Makefile +++ b/newasm/Makefile @@ -2,40 +2,42 @@ PICO = /dev/disk/by-label/RPI-RP2 DEVICE = /dev/ttyUSB0 .PHONY: build -build: parse.he +build: assemble.he -parse.he: parse.bin - od -An -tx2 -v parse.bin | sed "s/^ //" | tr " " "\n" | tr [:lower:] [:upper:] | sed "s/^/0x/" > parse.he +assemble.he: assemble.bin + od -An -tx2 -v assemble.bin | sed "s/^ //" | tr " " "\n" | tr [:lower:] [:upper:] | sed "s/^/0x/" > assemble.he -parse.bin: parse.elf - arm-none-eabi-objcopy -O binary parse.elf parse.bin +assemble.bin: assemble.elf + arm-none-eabi-objcopy -O binary assemble.elf assemble.bin objects = main.o uart.o data.o string.o input.o \ statement.o optable.o \ + regreg.o regregimm.o regimm.o regregreg.o imm.o \ parsers/whitespace.o \ parsers/label.o \ parsers/symbol.o \ parsers/opcode.o \ parsers/register.o \ - parsers/decimal.o + parsers/decimal.o \ + parsers/immediate.o -parse.elf: pico_bin.ld $(objects) - arm-none-eabi-ld -T pico_bin.ld -o parse.elf $(objects) +assemble.elf: pico_bin.ld $(objects) + arm-none-eabi-ld -T pico_bin.ld -o assemble.elf $(objects) $(objects): %.o: %.s arm-none-eabi-as -o $@ $< .PHONY: clean clean: - rm -f parse.elf parse.bin parse.he slowcat $(objects) + rm -f assemble.elf assemble.bin assemble.he slowcat $(objects) .PHONY: dump -dump: parse.bin - @od -Ax -tx2 -v parse.bin +dump: assemble.bin + @od -Ax -tx2 -v assemble.bin .PHONY: serial -serial: $(DEVICE) parse.he slowcat - cat parse.he | tr "\n" "\r" | ./slowcat | picocom -b 115200 -q $(DEVICE) +serial: $(DEVICE) assemble.he slowcat + cat assemble.he | tr "\n" "\r" | ./slowcat | picocom -b 115200 -q $(DEVICE) @echo echo -n "G" | picocom -b 115200 -q $(DEVICE) @echo diff --git a/newasm/errors.s b/newasm/errors.s new file mode 100644 index 0000000..3822851 --- /dev/null +++ b/newasm/errors.s @@ -0,0 +1,30 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +// .type print_error, %function +// .global print_error + +// 00 success +// 01 expected an opcode (or unexpected character) +// 02 expected label or opcode +// 03 expected colon at end of label +// 04 opcode not found + +// 05 expected digit + +// 06 expected register +// 07 invalid general-purpose register number +// 08 invalid register for this register position +// 09 invalid register combo for this instruction + +// 0A immediate value too large + +// 0B extra input at end of statement + + .align 4 +no_colon: .asciz "Error: Expected colon at end of label" + .align 4 +no_opcode: .asciz "Error: Expected an opcode" + .align 4 +not_found: .asciz "Error: Opcode not found" diff --git a/newasm/imm.s b/newasm/imm.s new file mode 100644 index 0000000..f6c7266 --- /dev/null +++ b/newasm/imm.s @@ -0,0 +1,27 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type imm, %function +.global imm + +// R1 instruction under construction +// R3 immediate width +// R4 input buffer + +imm: PUSH {LR} + PUSH {R3} + BL immediate + POP {R3} + BNE exit + MOVS R0, 1 + LSLS R0, R3 + CMP R2, R0 + BLO fine + MOVS R0, 0x0A // return code 0A (immediate value too large) + POP {PC} +fine: ORRS R1, R2 // fill in imm + MOVS R0, 0 // return code 0 (success) +exit: POP {PC} +bad_reg: MOVS R0, 8 // return code 8 (invalid register for this register position) + POP {PC} diff --git a/newasm/main.s b/newasm/main.s index a27fec9..f1d3c0d 100644 --- a/newasm/main.s +++ b/newasm/main.s @@ -6,38 +6,31 @@ .global main, strbuf main: LDR R5, =0x20002000 - BL uart_recv -loop: BL prompt - LDR R0, inpbuf + BL uart_recv // wait for keypress +loop: BL prompt // display address and data + LDR R0, inpbuf // get a line of input BL getline - LDR R0, inpbuf - BL putstrln - LDR R4, inpbuf - LDR R2, strbuf - MOVS R0, 0 + LDR R4, inpbuf // prepare input buffer + LDR R2, strbuf // prepare output buffer + MOVS R0, 0 // clear output buffer STRB R0, [R2] - BL statement - BNE bad - -good: ADR R0, success - PUSH {R1} - BL putstrln - MOVS R0, R4 - BL putstrln - POP {R0} + BL statement // call statement parser + BNE bad // print message if failure + MOVS R0, R1 // show assembled instruction BL send_hex LDR R0, =crlf BL putstr - B loop - -bad: ADR R0, fail - BL putstrln - MOVS R0, R4 + B loop // repeat +bad: PUSH {R0} + ADR R0, fail + BL putstr + POP {R0} + BL send_hex + LDR R0, =crlf + BL putstr + MOVS R0, R4 BL putstrln B loop -never: BL uart_recv - LDR R0, =0x20000001 - BX R0 prompt: PUSH {LR} MOVS R0, R5 @@ -57,4 +50,4 @@ strbuf: .word 0x20001F80 .align 4 success: .asciz "The parser suceeded" .align 4 -fail: .asciz "The parser failed" +fail: .asciz "The parser failed: " diff --git a/newasm/optable.s b/newasm/optable.s index a063f02..fb80ab4 100644 --- a/newasm/optable.s +++ b/newasm/optable.s @@ -5,9 +5,6 @@ .type lookup, %function .global lookup -// 1 unexpected char -// 2 unexpected end of input - // input: // R0 input string (unboxed) @@ -25,7 +22,7 @@ loop: CMP R5, R6 // if begin == end then range = 0 BEQ error ADDS R2, R5, R6 // get sum LSRS R2, 1 // divide by two -no_jiggle:LSLS R3, R2, 3 // times 8 (index to offset) + LSLS R3, R2, 3 // times 8 (index to offset) LDR R1, [R4, R3]// read opcode from optable CMP R0, R1 // compare opcode to input BLO lower // if lower @@ -42,7 +39,7 @@ lower: MOVS R6, R2 // update end pointer B loop higher: ADDS R5, R2, 1 // update begin pointer B loop -error: MOVS R0, 1 // return code 1 (not found) +error: MOVS R0, 4 // return code 4 (opcode not found) POP {R4, R5, R6} BX LR @@ -53,5 +50,5 @@ op_table: .ascii "ASPI"; .hword 0xB000; .byte 0x04, 0x07 // instr type 4, special code 7 .ascii "ASRI"; .hword 0x1000; .byte 0x01, 0x05 // instr type 1, special code 5 (imm width) .ascii "ADCS"; .hword 0x4140; .byte 0x00, 0x00 // instr type 0, special code 0 - .ascii "ADDS"; .hword 0x1400; .byte 0x03, 0x00 // instr type 3, special code 0 + .ascii "ADDS"; .hword 0x1800; .byte 0x03, 0x00 // instr type 3, special code 0 .ascii "BKPT"; .hword 0xBE00; .byte 0x04, 0x08 // instr type 4, special code 8 diff --git a/newasm/parsers/decimal.s b/newasm/parsers/decimal.s index a1516d2..cc9dff4 100644 --- a/newasm/parsers/decimal.s +++ b/newasm/parsers/decimal.s @@ -5,8 +5,6 @@ .type decimal, %function .global decimal -// 1 not a digit - // R4 input stream // R2 output value @@ -35,5 +33,5 @@ loop: LDRB R0, [R4] // get another char MULS R2, R3 // shift result by one decimal place ADDS R2, R0 // accumulate into R2 B loop // keep getting digits -bad: MOVS R0, #1 // return code 1 (not a digit) +bad: MOVS R0, #5 // return code 5 (not a digit) BX LR diff --git a/newasm/parsers/immediate.s b/newasm/parsers/immediate.s new file mode 100644 index 0000000..ac3e578 --- /dev/null +++ b/newasm/parsers/immediate.s @@ -0,0 +1,13 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type immediate, %function +.global immediate + +// R4 input buffer +// R2 output buffer + +immediate:PUSH {LR} + BL decimal + POP {PC} diff --git a/newasm/parsers/label.s b/newasm/parsers/label.s index 4770e6c..45d39e5 100644 --- a/newasm/parsers/label.s +++ b/newasm/parsers/label.s @@ -5,9 +5,6 @@ .type label, %function .global label -// 1 unexpected begin char -// 2 doesn't end with colon - // R4 input buffer // R2 output buffer @@ -23,7 +20,7 @@ label: PUSH {LR} LDRB R0, [R4] // get a char CMP R0, ': // colon BEQ 3f - MOVS R0, #2 // return code 2 (expected colon) + MOVS R0, #3 // return code 3 (expected colon) POP {PC} 3: ADDS R4, 1 // consume the colon MOVS R0, #0 // return code 0 (success) diff --git a/newasm/parsers/register.s b/newasm/parsers/register.s index 48ebf7a..9334aa3 100644 --- a/newasm/parsers/register.s +++ b/newasm/parsers/register.s @@ -5,9 +5,6 @@ .type register, %function .global register -// 1 unexpected char -// 2 invalid general-purpose register number - // R4 input buffer // R2 output buffer @@ -21,9 +18,12 @@ r_test: CMP R0, 'R POP {PC} // error code already in R0 validate: CMP R2, #12 // general purpose registers 0-12 BLS success - MOVS R0, #2 // invalid register number error code + MOVS R0, #7 // invalid register number error code POP {PC} -special: LDRH R0, [R4] // get two bytes from input stream +special: LDRB R0, [R4] // get two bytes from input stream + LDRB R1, [R4, 1] + LSLS R1, 8 + ORRS R0, R1 ADR R2, table // get address of table MOVS R3, 0 // set table offset to 0 loop: LDRH R1, [R2, R3] // get two bytes from table at current offset @@ -32,7 +32,7 @@ loop: LDRH R1, [R2, R3] // get two bytes from table at current offset ADDS R3, 2 // increment table offset CMP R3, 6 // compare offset to table size BLO loop // loop until end of table - MOVS R0, #1 // return code 1 (unexpected char) + MOVS R0, #6 // return code 6 (expected register) POP {PC} done: ADDS R4, 2 // consume two chars LSRS R3, 1 // divide table offset by two to row @@ -41,6 +41,7 @@ done: ADDS R4, 2 // consume two chars success: MOVS R0, #0 // return code 0 (success) POP {PC} + .align 4 table: .ascii "SP" .ascii "LR" .ascii "PC" diff --git a/newasm/regimm.s b/newasm/regimm.s new file mode 100644 index 0000000..3e294c8 --- /dev/null +++ b/newasm/regimm.s @@ -0,0 +1,37 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type regimm, %function +.global regimm + +// R1 instruction under construction +// R3 immediate width +// R4 input buffer + +regimm: PUSH {LR} + PUSH {R3} + BL register // parse a register + POP {R3} + BNE exit // exit if failure + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + LSLS R2, 8 // shift by 8 + ORRS R1, R2 // fill in Rdn + BL whitespace // mandatory whitespace + BNE exit // exit if failure + PUSH {R3} + BL immediate + POP {R3} + BNE exit + MOVS R0, 1 + LSLS R0, R3 + CMP R2, R0 + BLO fine + MOVS R0, 0x0A // returun code 0A (immediate value too large) + POP {PC} +fine: ORRS R1, R2 // fill in imm + MOVS R0, 0 // return code 0 (success) +exit: POP {PC} +bad_reg: MOVS R0, 8 // return code 8 (invalid register for this register position) + POP {PC} diff --git a/newasm/regreg.s b/newasm/regreg.s new file mode 100644 index 0000000..6893dee --- /dev/null +++ b/newasm/regreg.s @@ -0,0 +1,29 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type regreg, %function +.global regreg + +// R1 instruction under construction +// R2 output buffer +// R4 input buffer + +regreg: PUSH {LR} + BL register // parse a register + BNE exit // exit if failure + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + ORRS R1, R2 // fill in Rdn + BL whitespace // mandatory whitespace + BNE exit // exit if failure + BL register // parse a register + BNE exit + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + LSLS R2, 3 // shift by 3 + ORRS R1, R2 // fill in Rm + MOVS R0, 0 // return code 0 (success) +exit: POP {PC} +bad_reg: MOVS R0, 8 // return code 8 (invalid register for this register position) + POP {PC} diff --git a/newasm/regregimm.s b/newasm/regregimm.s new file mode 100644 index 0000000..6c649e2 --- /dev/null +++ b/newasm/regregimm.s @@ -0,0 +1,47 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type regregimm, %function +.global regregimm + +// R1 instruction under construction +// R3 immediate width +// R4 input buffer + +regregimm:PUSH {LR} + PUSH {R3} + BL register // parse a register + POP {R3} + BNE exit // exit if failure + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + ORRS R1, R2 // fill in Rd + BL whitespace // mandatory whitespace + BNE exit // exit if failure + PUSH {R3} + BL register // parse a register + POP {R3} + BNE exit + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + LSLS R2, 3 // shift by 3 + ORRS R1, R2 // fill in Rn + BL whitespace // mandatory whitespace + BNE exit // exit if failure + PUSH {R3} + BL immediate + POP {R3} + BNE exit + MOVS R0, 1 + LSLS R0, R3 + CMP R2, R0 + BLO fine + MOVS R0, 0x0A // returun code 0A (immediate value too large) + POP {PC} +fine: LSLS R2, 6 // shift by 6 + ORRS R1, R2 // fill in imm + MOVS R0, 0 // return code 0 (success) +exit: POP {PC} +bad_reg: MOVS R0, 8 // return code 8 (invalid register for this register position) + POP {PC} diff --git a/newasm/regregreg.s b/newasm/regregreg.s new file mode 100644 index 0000000..a8be6ee --- /dev/null +++ b/newasm/regregreg.s @@ -0,0 +1,44 @@ +.syntax unified +.cpu cortex-m0plus +.thumb + +.type regregreg, %function +.global regregreg + +// R1 instruction under construction +// R2 output buffer +// R4 input buffer + +regregreg:PUSH {LR} + + // TODO loop it + BL register // parse a register + BNE exit // exit if failure + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + ORRS R1, R2 // fill in Rd + + BL whitespace // mandatory whitespace + BNE exit // exit if failure + + BL register // parse a register + BNE exit + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + LSLS R2, 3 // shift by 3 + ORRS R1, R2 // fill in Rn + + BL whitespace // mandatory whitespace + BNE exit // exit if failure + + BL register // parse a register + BNE exit + CMP R2, 7 // check that it's R0-R7 + BHI bad_reg + LSLS R2, 6 // shift by 6 + ORRS R1, R2 // fill in Rm + + MOVS R0, 0 // return code 0 (success) +exit: POP {PC} +bad_reg: MOVS R0, 8 // return code 8 (invalid register for this register position) + POP {PC} diff --git a/newasm/statement.s b/newasm/statement.s index 4a199a6..436ae03 100644 --- a/newasm/statement.s +++ b/newasm/statement.s @@ -5,40 +5,47 @@ .type statement, %function .global statement -// 1 reported an error - // R4 input buffer // R2 output buffer statement:PUSH {LR} - BL whitespace - PUSH {R2} + MOVS R3, R2 // save output buffer + BL whitespace // skip leading whitespace BL label - POP {R2} - CMP R0, #2 // check for error code 2 (no colon) - BNE 1f - ADR R0, no_colon - B err_exit + CMP R0, 3 // only exit for code 3 (no colon at end of label) + BEQ exit + MOVS R1, R0 1: BL whitespace - PUSH {R2} - BL opcode - POP {R2} - BEQ 2f - ADR R0, no_opcode - B err_exit -2: LDR R0, [R2] + MOVS R2, 0 // reset output buffer + STRB R2, [R3] + MOVS R2, R3 + BL opcode // TODO opcodes longer than 4 + ADD R0, R1 + BNE exit +2: MOVS R0, R3 + PUSH {R3} + BL putstrln + POP {R3} + LDR R0, [R3] BL lookup - BEQ 3f - ADR R0, not_found - B err_exit -3: POP {PC} // success code already in R0 -err_exit: BL putstrln - MOVS R0, #1 // return code 1 (there was an error) - POP {PC} + BNE exit + BL whitespace // TODO new return code for expected whitespace + BNE exit + ADR R0, parsers + LSLS R2, 2 // multiply by 4 to get byte offset + LDR R2, [R0, R2] // get address of parser + BLX R2 + BNE exit + BL whitespace + LDRB R0, [R4] // get a byte from the input stream + TST R0, R0 // check if zero + BEQ exit // if it's zero then success + MOVS R0, 0x0B // return code 0B (extra input at end) +exit: POP {PC} - .align 4 -no_colon: .asciz "Error: Expected colon at end of label" - .align 4 -no_opcode: .asciz "Error: Expected an opcode" - .align 4 -not_found: .asciz "Error: Opcode not found" + .align 4 +parsers: .word regreg + .word regregimm + .word regimm + .word regregreg + .word imm -- cgit v1.2.3