Stmt Grp1 reg "," operand | Grp2 reg "," reg "," constant | Grp3 operand | goto operand | halt Grp1 load | store | add | sub Grp2 ifeq | iflt | ifgt Grp3 get | put reg ax | bx | cx | dx operand reg | constant | [bx] | constant [bx] constant hexdigit constant | hexdigit hexdigit 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f
There are some minor semantic details that the program handles (such as disallowing stores into immediate operands). The assembly code for the miniassembler follows:
; ASM.ASM ; .xlist include stdlib.a matchfuncs includelib stdlib.lib .list dseg segment para public 'data' ; Some sample statements to assemble: Str1 byte "load ax, 0",0 Str2 byte "load ax, bx",0 Str3 byte "load ax, ax",0 Str4 byte "add ax, 15",0 Str5 byte "sub ax, [bx]",0 Str6 byte "store bx, [1000]",0 Str7 byte "load bx, 2000[bx]",0 Str8 byte "goto 3000",0 Str9 byte "iflt ax, bx, 100",0 Str10 byte "halt",0 Str11 byte "This is illegal",0 Str12 byte "load ax, store",0 Str13 byte "store ax, 1000",0 Str14 byte "ifeq ax, 0, 0",0 ; Variables used by the assembler. AsmConst word 0 AsmOpcode byte 0 AsmOprnd1 byte 0 AsmOprnd2 byte 0 include stdsets.a ;Bring in the standard char sets. ; Patterns for the assembler: ; Pattern is ( ; (load|store|add|sub) reg "," operand | ; (ifeq|iflt|ifgt) reg1 "," reg2 "," const | ; (get|put) operand | ; goto operand | ; halt ; ) ; ; With a few semantic additions (e.g., cannot store to a const). InstrPat pattern {spancset, WhiteSpace,Grp1,Grp1} Grp1 pattern {sl_Match2,Grp1Strs, Grp2 ,Grp1Oprnds} Grp1Strs pattern {TryLoad,,Grp1Store} Grp1Store pattern {TryStore,,Grp1Add} Grp1Add pattern {TryAdd,,Grp1Sub} Grp1Sub pattern {TrySub} ; Patterns for the LOAD, STORE, ADD, and SUB instructions. LoadPat pattern {MatchStr,LoadInstr2} LoadInstr2 byte "LOAD",0 StorePat pattern {MatchStr,StoreInstr2} StoreInstr2 byte "STORE",0 AddPat pattern {MatchStr,AddInstr2} AddInstr2 byte "ADD",0 SubPat pattern {MatchStr,SubInstr2} SubInstr2 byte "SUB",0 ; Patterns for the group one (LOAD/STORE/ADD/SUB) instruction operands: Grp1Oprnds pattern {spancset,WhiteSpace,Grp1reg,Grp1reg} Grp1Reg pattern {MatchReg,AsmOprnd1,,Grp1ws2} Grp1ws2 pattern {spancset,WhiteSpace,Grp1Comma,Grp1Comma} Grp1Comma pattern {MatchChar,',',0,Grp1ws3} Grp1ws3 pattern {spancset,WhiteSpace,Grp1Op2,Grp1Op2} Grp1Op2 pattern {MatchGen,,,EndOfLine} EndOfLine pattern {spancset,WhiteSpace,NullChar,NullChar} NullChar pattern {EOS} Grp1Op2Reg pattern {MatchReg,AsmOprnd2} ; Patterns for the group two instructions (IFEQ, IFLT, IFGT): Grp2 pattern {sl_Match2,Grp2Strs, Grp3 ,Grp2Oprnds} Grp2Strs pattern {TryIFEQ,,Grp2IFLT} Grp2IFLT pattern {TryIFLT,,Grp2IFGT} Grp2IFGT pattern {TryIFGT} Grp2Oprnds pattern {spancset,WhiteSpace,Grp2reg,Grp2reg} Grp2Reg pattern {MatchReg,AsmOprnd1,,Grp2ws2} Grp2ws2 pattern {spancset,WhiteSpace,Grp2Comma,Grp2Comma} Grp2Comma pattern {MatchChar,',',0,Grp2ws3} Grp2ws3 pattern {spancset,WhiteSpace,Grp2Reg2,Grp2Reg2} Grp2Reg2 pattern {MatchReg,AsmOprnd2,,Grp2ws4} Grp2ws4 pattern {spancset,WhiteSpace,Grp2Comma2,Grp2Comma2} Grp2Comma2 pattern {MatchChar,',',0,Grp2ws5} Grp2ws5 pattern {spancset,WhiteSpace,Grp2Op3,Grp2Op3} Grp2Op3 pattern {ConstPat,,,EndOfLine} ; Patterns for the IFEQ, IFLT, and IFGT instructions. IFEQPat pattern {MatchStr,IFEQInstr2} IFEQInstr2 byte "IFEQ",0 IFLTPat pattern {MatchStr,IFLTInstr2} IFLTInstr2 byte "IFLT",0 IFGTPat pattern {MatchStr,IFGTInstr2} IFGTInstr2 byte "IFGT",0 ; Grp3 Patterns: Grp3 pattern {sl_Match2,Grp3Strs, Grp4 ,Grp3Oprnds} Grp3Strs pattern {TryGet,,Grp3Put} Grp3Put pattern {TryPut,,Grp3GOTO} Grp3Goto pattern {TryGOTO} ; Patterns for the GET and PUT instructions. GetPat pattern {MatchStr,GetInstr2} GetInstr2 byte "GET",0 PutPat pattern {MatchStr,PutInstr2} PutInstr2 byte "PUT",0 GOTOPat pattern {MatchStr,GOTOInstr2} GOTOInstr2 byte "GOTO",0 ; Patterns for the group three (PUT/GET/GOTO) instruction operands: Grp3Oprnds pattern {spancset,WhiteSpace,Grp3Op,Grp3Op} Grp3Op pattern {MatchGen,,,EndOfLine} ; Patterns for the group four instruction (HALT). Grp4 pattern {TryHalt,,,EndOfLine} HaltPat pattern {MatchStr,HaltInstr2} HaltInstr2 byte "HALT",0 ; Patterns to match the four non-register addressing modes: BXIndrctPat pattern {MatchStr,BXIndrctStr} BXIndrctStr byte "[BX]",0 BXIndexedPat pattern {ConstPat,,,BXIndrctPat} DirectPat pattern {MatchChar,'[',,DP2} DP2 pattern {ConstPat,,,DP3} DP3 pattern {MatchChar,']'} ImmediatePat pattern {ConstPat} ; Pattern to match a hex constant: HexConstPat pattern {Spancset, xdigits} dseg ends cseg segment para public 'code' assume cs:cseg, ds:dseg ; The store macro tweaks the DS register and stores into the ; specified variable in DSEG. store macro Where, What push ds push ax mov ax, seg Where mov ds, ax mov Where, What pop ax pop ds endm ; Pattern matching routines for the assembler. ; Each mnemonic has its own corresponding matching function that ; attempts to match the mnemonic. If it does, it initializes the ; AsmOpcode variable with the base opcode of the instruction. ; Compare against the "LOAD" string. TryLoad proc far push dx push si ldxi LoadPat match2 jnc NoTLMatch store AsmOpcode, 0 ;Initialize base opcode. NoTLMatch: pop si pop dx ret TryLoad endp ; Compare against the "STORE" string. TryStore proc far push dx push si ldxi StorePat match2 jnc NoTSMatch store AsmOpcode, 1 ;Initialize base opcode. NoTSMatch: pop si pop dx ret TryStore endp ; Compare against the "ADD" string. TryAdd proc far push dx push si ldxi AddPat match2 jnc NoTAMatch store AsmOpcode, 2 ;Initialize ADD opcode. NoTAMatch: pop si pop dx ret TryAdd endp ; Compare against the "SUB" string. TrySub proc far push dx push si ldxi SubPat match2 jnc NoTMMatch store AsmOpcode, 3 ;Initialize SUB opcode. NoTMMatch: pop si pop dx ret TrySub endp ; Compare against the "IFEQ" string. TryIFEQ proc far push dx push si ldxi IFEQPat match2 jnc NoIEMatch store AsmOpcode, 4 ;Initialize IFEQ opcode. NoIEMatch: pop si pop dx ret TryIFEQ endp ; Compare against the "IFLT" string. TryIFLT proc far push dx push si ldxi IFLTPat match2 jnc NoILMatch store AsmOpcode, 5 ;Initialize IFLT opcode. NoILMatch: pop si pop dx ret TryIFLT endp ; Compare against the "IFGT" string. TryIFGT proc far push dx push si ldxi IFGTPat match2 jnc NoIGMatch store AsmOpcode, 6 ;Initialize IFGT opcode. NoIGMatch: pop si pop dx ret TryIFGT endp ; Compare against the "GET" string. TryGET proc far push dx push si ldxi GetPat match2 jnc NoGMatch store AsmOpcode, 7 ;Initialize Special opcode. store AsmOprnd1, 2 ;GET's Special opcode. NoGMatch: pop si pop dx ret TryGET endp ; Compare against the "PUT" string. TryPut proc far push dx push si ldxi PutPat match2 jnc NoPMatch store AsmOpcode, 7 ;Initialize Special opcode. store AsmOprnd1, 3 ;PUT's Special opcode. NoPMatch: pop si pop dx ret TryPUT endp ; Compare against the "GOTO" string. TryGOTO proc far push dx push si ldxi GOTOPat match2 jnc NoGMatch store AsmOpcode, 7 ;Initialize Special opcode. store AsmOprnd1, 1 ;PUT's Special opcode. NoGMatch: pop si pop dx ret TryGOTO endp ; Compare against the "HALT" string. TryHalt proc far push dx push si ldxi HaltPat match2 jnc NoHMatch store AsmOpcode, 7 ;Initialize Special opcode. store AsmOprnd1, 0 ;Halt's special opcode. store AsmOprnd2, 0 NoHMatch: pop si pop dx ret TryHALT endp ; MatchReg checks to see if we've got a valid register value. On entry, ; DS:SI points at the location to store the byte opcode (0, 1, 2, or 3) for ; a reasonable register (AX, BX, CX, or DX); ES:DI points at the string ; containing (hopefully) the register operand, and CX points at the last ; location plus one we can check in the string. ; ; On return, Carry=1 for success, 0 for failure. ES:AX must point beyond ; the characters which make up the register if we have a match. MatchReg proc far ; ES:DI Points at two characters which should be AX/BX/CX/DX. Anything ; else is an error. cmp byte ptr es:1[di], 'X' ;Everyone needs this jne BadReg xor ax, ax ;886 "AX" reg code. cmp byte ptr es:[di], 'A' ;AX? je GoodReg inc ax cmp byte ptr es:[di], 'B' ;BX? je GoodReg inc ax cmp byte ptr es:[di], 'C' ;CX? je GoodReg inc ax cmp byte ptr es:[di], 'D' ;DX? je GoodReg BadReg: clc mov ax, di ret GoodReg: mov ds:[si], al ;Save register opcode. lea ax, 2[di] ;Skip past register. cmp ax, cx ;Be sure we didn't go ja BadReg ; too far. stc ret MatchReg endp ; MatchGen- Matches a general addressing mode. Stuffs the appropriate ; addressing mode code into AsmOprnd2. If a 16-bit constant ; is required by this addressing mode, this code shoves that ; into the AsmConst variable. MatchGen proc far push dx push si ; Try a register operand. ldxi Grp1Op2Reg match2 jc MGDone ; Try "[bx]". ldxi BXIndrctPat match2 jnc TryBXIndexed store AsmOprnd2, 4 jmp MGDone ; Look for an operand of the form "xxxx[bx]". TryBXIndexed: ldxi BXIndexedPat match2 jnc TryDirect store AsmOprnd2, 5 jmp MGDone ; Try a direct address operand "[xxxx]". TryDirect: ldxi DirectPat match2 jnc TryImmediate store AsmOprnd2, 6 jmp MGDone ; Look for an immediate operand "xxxx". TryImmediate: ldxi ImmediatePat match2 jnc MGDone store AsmOprnd2, 7 MGDone: pop si pop dx ret MatchGen endp ; ConstPat- Matches a 16-bit hex constant. If it matches, it converts ; the string to an integer and stores it into AsmConst. ConstPat proc far push dx push si ldxi HexConstPat match2 jnc CPDone push ds push ax mov ax, seg AsmConst mov ds, ax atoh mov AsmConst, ax pop ax pop ds stc CPDone: pop si pop dx ret ConstPat endp ; Assemble- This code assembles the instruction that ES:DI points ; at and displays the hex opcode(s) for that instruction. Assemble proc near ; Print out the instruction we're about to assemble. print byte "Assembling: ",0 strupr puts putcr ; Assemble the instruction: ldxi InstrPat xor cx, cx match jnc SyntaxError ; Quick check for illegal instructions: cmp AsmOpcode, 7 ;Special/Get instr. jne TryStoreInstr cmp AsmOprnd1, 2 ;GET opcode je SeeIfImm cmp AsmOprnd1, 1 ;Goto opcode je IsGOTO TryStoreInstr: cmp AsmOpcode, 1 ;Store Instruction jne InstrOkay SeeIfImm: cmp AsmOprnd2, 7 ;Immediate Adrs Mode jne InstrOkay print db "Syntax error: store/get immediate not allowed." db " Try Again",cr,lf,0 jmp ASMDone IsGOTO: cmp AsmOprnd2, 7 ;Immediate mode for GOTO je InstrOkay print db "Syntax error: GOTO only allows immediate " byte "mode.",cr,lf db 0 jmp ASMDone ; Merge the opcode and operand fields together in the instruction byte, ; then output the opcode byte. InstrOkay: mov al, AsmOpcode shl al, 1 shl al, 1 or al, AsmOprnd1 shl al, 1 shl al, 1 shl al, 1 or al, AsmOprnd2 puth cmp AsmOpcode, 4 ;IFEQ instruction jb SimpleInstr cmp AsmOpcode, 6 ;IFGT instruction jbe PutConstant SimpleInstr: cmp AsmOprnd2, 5 jb ASMDone ; If this instruction has a 16 bit operand, output it here. PutConstant: mov al, ' ' putc mov ax, ASMConst puth mov al, ' ' putc xchg al, ah puth jmp ASMDone SyntaxError: print db "Syntax error in instruction." db cr,lf,0 ASMDone: putcr ret Assemble endp ; Main program that tests the assembler. Main proc mov ax, seg dseg ;Set up the segment registers mov ds, ax mov es, ax meminit lesi Str1 call Assemble lesi Str2 call Assemble lesi Str3 call Assemble lesi Str4 call Assemble lesi Str5 call Assemble lesi Str6 call Assemble lesi Str7 call Assemble lesi Str8 call Assemble lesi Str9 call Assemble lesi Str10 call Assemble lesi Str11 call Assemble lesi Str12 call Assemble lesi Str13 call Assemble lesi Str14 call Assemble Quit: ExitPgm Main endp cseg ends sseg segment para stack 'stack' stk db 256 dup ("stack ") sseg ends zzzzzzseg segment para public 'zzzzzz' LastBytes db 16 dup (?) zzzzzzseg ends end Main
Sample Output:
Assembling: LOAD AX, 0 07 00 00 Assembling: LOAD AX, BX 01 Assembling: LOAD AX, AX 00 Assembling: ADD AX, 15 47 15 00 Assembling: SUB AX, [BX] 64 Assembling: STORE BX, [1000] 2E 00 10 Assembling: LOAD BX, 2000[BX] 0D 00 20 Assembling: GOTO 3000 EF 00 30 Assembling: IFLT AX, BX, 100 A1 00 01 Assembling: HALT E0 Assembling: THIS IS ILLEGAL Syntax error in instruction. Assembling: LOAD AX, STORE Syntax error in instruction. Assembling: STORE AX, 1000 Syntax error: store/get immediate not allowed. Try Again Assembling: IFEQ AX, 0, 0 Syntax error in instruction.