[Project Log] Python on the 6502/C64, 8080, 6800, 6809 and AVR

Bill · August 21, 2020, 8:14pm

Double Dabble for the Motorola 68000:

.                                 00113 ******************************************************************************
.                                 00114 *
.                                 00115 * Format - Convert a number to ASCII decimal
.                                 00116 *
.                                 00117 * Input:
.                                 00118 *       The first bytes of Dabble = the number to convert
.                                 00119 *       D1 = number of bits to convert
.                                 00120 *       D2 = number of bytes to convert
.                                 00121 *       D3 = number of packed BCD bytes in result
.                                 00122 *
.                                 00123 * Output:
.                                 00124 *       Output string with length byte starting at Out+1
.                                 00125 *
.                                 00126 * Uses:
.                                 00127 *       D0
.                                 00128 *       D4
.                                 00129 *       D5
.                                 00130 *       A0
.                                 00131 *       A1
.                                 00132 *       A2
.                                 00133 *
.                                 00134 * Note:
.                                 00135 *       Implemented with the Double Dabble algorithm:
.                                 00136 *
.                                 00137 *         https://en.wikipedia.org/wiki/Double_dabble
.                                 00138 *
.00000530                         00139 Format:
.00000530 207C 0000076A           00140         movea.l #Dabble,A0      ; Clear the BCD digits
.00000536 D0C2                    00141         adda.w  D2,A0
.00000538 1A03                    00142         move.b  D3,D5
.                                 00143
.0000053A                         00144 Format0:
.0000053A 4218                    00145         clr.b   (A0)+
.0000053C 5305                    00146         subq.b  #1,D5
.0000053E 66FA  (0000053A)        00147         bne     Format0
.                                 00148
.00000540 207C 00000769           00149         movea.l #Dabble-1,A0    ; Determine index of number to convert
.00000546 D0C2                    00150         adda.w  D2,A0           ; This is the LSB
.                                 00151
.00000548 2248                    00152         move.l  A0,A1           ; Determine index of BCD digits
.0000054A D2C3                    00153         adda.w  D3,A1           ; BCD digits are big endian order
.                                 00154                                 ; Index := Num number bytes + Num BCD bytes - 1
.                                 00155
.0000054C                         00156 Format1:
.0000054C 2449                    00157         move.l  A1,A2           ; Index to BCD digits
.0000054E 1A03                    00158         move.b  D3,D5           ; Number of bytes in converted BCD
.                                 00159
.00000550                         00160 Format2:
.00000550 1012                    00161         move.b  (A2),D0         ; Isolate lower nybble
.00000552 0240 000F               00162         andi    #$F,D0
.00000556 0C00 0005               00163         cmpi.b  #4+1,D0         ; If greater than 4, add 3
.0000055A 6500 0004  (00000560)   00164         blo     FormatLoNotGT4
.                                 00165
.0000055E 5600                    00166         addq.b  #3,D0
.                                 00167
.00000560                         00168 FormatLoNotGT4:
.00000560 1812                    00169         move.b  (A2),D4         ; Isolate upper nybble
.00000562 0244 00F0               00170         andi    #$F0,D4
.00000566 0C04 0050               00171         cmpi.b  #$40+$10,D4     ; If greater than 4, add 3
.0000056A 6500 0006  (00000572)   00172         blo     FormatHiNotGT4
.                                 00173
.0000056E 0604 0030               00174         addi.b  #$30,D4
.                                 00175
.00000572                         00176 FormatHiNotGT4:
.00000572 8004                    00177         or.b    D4,D0           ; Combine nybbles
.00000574 1480                    00178         move.b  D0,(A2)
.00000576 538A                    00179         subq.l  #1,A2
.00000578 5305                    00180         subq.b  #1,D5           ; More BCD digits to check?
.0000057A 66D4  (00000550)        00181         bne     Format2
.                                 00182
.0000057C 1012                    00183         move.b  (A2),D0         ; Shift the number left one bit
.0000057E E300                    00184         asl.b   #1,D0
.00000580 E314                    00185         roxl.b  #1,D4           ; Save X flag
.00000582 1480                    00186         move.b  D0,(A2)
.00000584 1A02                    00187         move.b  D2,D5
.00000586 5305                    00188         subq.b  #1,D5
.00000588 6700 0010  (0000059A)   00189         beq     Format3         ; No additional bytes
.                                 00190
.0000058C                         00191 FormatShiftNumber:
.0000058C 1022                    00192         move.b  -(A2),D0        ; Shift the rest of the number
.0000058E E20C                    00193         lsr.b   #1,D4           ; Recover X flag
.00000590 E310                    00194         roxl.b  #1,D0
.00000592 E314                    00195         roxl.b  #1,D4           ; Save X flag
.00000594 1480                    00196         move.b  D0,(A2)
.00000596 5305                    00197         subq.b  #1,D5
.00000598 66F2  (0000058C)        00198         bne     FormatShiftNumber
.                                 00199
.0000059A                         00200 Format3:
.0000059A 2449                    00201         movea.l A1,A2           ; Shift the BCD digits left one bit
.0000059C 1A03                    00202         move.b  D3,D5
.                                 00203
.0000059E                         00204 FormatShiftBCD:
.0000059E 1012                    00205         move.b  (A2),D0
.000005A0 E20C                    00206         lsr.b   #1,D4           ; Recover X flag
.000005A2 E310                    00207         roxl.b  #1,D0
.000005A4 E314                    00208         roxl.b  #1,D4           ; Save X flag
.000005A6 1480                    00209         move.b  D0,(A2)
.000005A8 538A                    00210         subq.l  #1,A2
.000005AA 5305                    00211         subq.b  #1,D5
.000005AC 66F0  (0000059E)        00212         bne     FormatShiftBCD
.                                 00213
.000005AE 5301                    00214         subq.b  #1,D1           ; More bits to process?
.000005B0 669A  (0000054C)        00215         bne     Format1
.                                 00216
.000005B2 227C 00000767           00217         movea.l #Out+1,A1       ; Address the output string
.000005B8 5288                    00218         addq.l  #1,A0           ; Address MSB of BCD digits
.000005BA 4204                    00219         clr.b   D4              ; Clear output digit count
.                                 00220
.000005BC                         00221 Format4:
.000005BC 1010                    00222         move.b  (A0),D0         ; Isolate upper digit
.000005BE 0200 00F0               00223         andi.b  #$F0,D0
.000005C2 6600 0008  (000005CC)   00224         bne     FormatEmitHi
.                                 00225
.000005C6 4A04                    00226         tst.b   D4              ; Leading zero?
.000005C8 6700 000E  (000005D8)   00227         beq     FormatSkipHi
.                                 00228
.000005CC                         00229 FormatEmitHi:
.000005CC E808                    00230         lsr.b   #4,D0           ; Shift into lower nybble
.                                 00231
.000005CE 0600 0030               00232         addi.b  #'0',D0         ; Convert to ASCII numeral
.000005D2 5289                    00233         addq.l  #1,A1
.000005D4 1280                    00234         move.b  D0,(A1)
.000005D6 5204                    00235         addq.b  #1,D4
.                                 00236
.000005D8                         00237 FormatSkipHi:
.000005D8 1018                    00238         move.b  (A0)+,D0        ; Isolate lower digit
.000005DA 0200 000F               00239         andi.b  #$F,D0
.000005DE 6600 0008  (000005E8)   00240         bne     FormatEmitLo
.                                 00241
.000005E2 4A04                    00242         tst.b   D4              ; Leading zero?
.000005E4 6700 000C  (000005F2)   00243         beq     FormatSkipLo
.                                 00244
.000005E8                         00245 FormatEmitLo:
.000005E8 0600 0030               00246         addi.b  #'0',D0         ; Convert to ASCII numeral
.000005EC 5289                    00247         addq.l  #1,A1
.000005EE 1280                    00248         move.b  D0,(A1)
.000005F0 5204                    00249         addq.b  #1,D4
.                                 00250
.000005F2                         00251 FormatSkipLo:
.000005F2 5303                    00252         subq.b  #1,D3           ; More digits?
.000005F4 66C6  (000005BC)        00253         bne     Format4
.                                 00254
.000005F6 11C4 0767               00255         move.b  D4,Out+1        ; Store length of result
.000005FA 6600 0012  (0000060E)   00256         bne     FormatDone      ; Check for all 0's
.                                 00257
.000005FE 103C 0001               00258         move.b  #1,D0           ; Default to "0"
.00000602 11C0 0767               00259         move.b  D0,Out+1
.00000606 103C 0030               00260         move.b  #'0',D0
.0000060A 11C0 0768               00261         move.b  D0,Out+1+1
.                                 00262
.0000060E                         00263 FormatDone:
.0000060E 4E75                    00264         rts

And for the Atmel AVR:

.                         00296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.                         00297 ;
.                         00298 ; Format - Convert a number to ASCII decimal
.                         00299 ;
.                         00300 ; Input:
.                         00301 ;       The first bytes of Dabble = the number to convert
.                         00302 ;       R16 = number of bytes to convert
.                         00303 ;       R17 = number of bits to convert
.                         00304 ;       R18 = number of packed BCD bytes in result
.                         00305 ;
.                         00306 ; Output:
.                         00307 ;       Output string with length byte starting at Out+1
.                         00308 ;
.                         00309 ; Uses:
.                         00310 ;       R0
.                         00311 ;       R19
.                         00312 ;       R20
.                         00313 ;       R21
.                         00314 ;       R22
.                         00315 ;       R23
.                         00316 ;       R28
.                         00317 ;       R29
.                         00318 ;       R30
.                         00319 ;       R31
.                         00320 ;
.                         00321 ; Note:
.                         00322 ;       Implemented with the Double Dabble algorithm:
.                         00323 ;
.                         00324 ;         https://en.wikipedia.org/wiki/Double_dabble
.                         00325 ;
.0000ED                   00326 Format:
.0000ED E0EB          [1] 00327         ldi     R30,low(Dabble) ; Clear the BCD digits
.0000EE E0F1          [1] 00328         ldi     R31,high(Dabble)
.0000EF 2400          [1] 00329         clr     R0
.0000F0 0FE1          [1] 00330         add     R30,R17
.0000F1 1DF0          [1] 00331         adc     R31,R0
.0000F2 2F72          [1] 00332         mov     R23,R18
.                         00333
.0000F3                   00334 Format0:
.0000F3 9201          [2] 00335         st      Z+,R0
.0000F4 957A          [1] 00336         dec     R23
.0000F5 F7E9=0000F3 [1/2] 00337         brne    Format0
.                         00338
.0000F6 2F62          [1] 00339         mov     R22,R18         ; Determine address of BCD digits + 1
.0000F7 0F61          [1] 00340         add     R22,R17         ; BCD digits are big endian order
.0000F8 E04B          [1] 00341         ldi     R20,low(Dabble) ; Index := Num number bytes + Num BCD bytes
.0000F9 E051          [1] 00342         ldi     R21,high(Dabble)
.0000FA 0F46          [1] 00343         add     R20,R22
.0000FB 1D50          [1] 00344         adc     R21,R0
.                         00345
.0000FC                   00346 Format1:
.0000FC 2F32          [1] 00347         mov     R19,R18         ; Number of bytes in converted BCD
.                         00348
.0000FD 2FE4          [1] 00349         mov     R30,R20         ; Address lowest BCD byte
.0000FE 2FF5          [1] 00350         mov     R31,R21
.                         00351
.0000FF                   00352 Format2:
.0000FF 9162          [3] 00353         ld      R22,-Z          ; Isolate lower nybble
.000100 2F76          [1] 00354         mov     R23,R22
.000101 706F          [1] 00355         andi    R22,$F
.000102 3065          [1] 00356         cpi     R22,4+1         ; If greater than 4, add 3
.000103 F008=000105 [1/2] 00357         brlo    FormatLoNotGT4
.                         00358
.000104 5F6D          [1] 00359         subi    R22,$100-3      ; Add 3
.                         00360
.000105                   00361 FormatLoNotGT4:
.000105 7F70          [1] 00362         andi    R23,$F0         ; Isolate upper nybble
.000106 3570          [1] 00363         cpi     R23,$40+$10     ; If greater than 4, add 3
.000107 F008=000109 [1/2] 00364         brlo    FormatHiNotGT4
.                         00365
.000108 5D70          [1] 00366         subi    R23,$100-$30    ; Add $30
.                         00367
.000109                   00368 FormatHiNotGT4:
.000109 2B67          [1] 00369         or      R22,R23         ; Combine nybbles
.00010A 8360          [1] 00370         st      Z,R22
.                         00371
.00010B 953A          [1] 00372         dec     R19             ; More BCD digits to check?
.00010C F791=0000FF [1/2] 00373         brne    Format2
.                         00374
.00010D 9160 010B     [2] 00375         lds     R22,Dabble      ; Shift the number left one bit
.00010F 0F66          [1] 00376         add     R22,R22
.000110 9360 010B     [2] 00377         sts     Dabble,R22
.                         00378
.000112 E0EC          [1] 00379         ldi     R30,low(Dabble+1)
.000113 E0F1          [1] 00380         ldi     R31,high(Dabble+1)
.000114 2F31          [1] 00381         mov     R19,R17
.000115 953A          [1] 00382         dec     R19
.000116 F029=00011C [1/2] 00383         breq    Format3         ; No additional bytes
.                         00384
.000117                   00385 FormatShiftNumber:
.000117 8160          [1] 00386         ld      R22,Z           ; Shift the rest of the number
.000118 1F66          [1] 00387         rol     R22
.000119 9361          [2] 00388         st      Z+,R22
.00011A 953A          [1] 00389         dec     R19
.00011B F7D9=000117 [1/2] 00390         brne    FormatShiftNumber
.                         00391
.00011C                   00392 Format3:
.00011C 2F32          [1] 00393         mov     R19,R18         ; Shift the BCD digits left one bit
.00011D 2FE4          [1] 00394         mov     R30,R20
.00011E 2FF5          [1] 00395         mov     R31,R21
.                         00396
.00011F                   00397 FormatShiftBCD:
.00011F 9162          [3] 00398         ld      R22,-Z
.000120 1F66          [1] 00399         rol     R22
.000121 8360          [1] 00400         st      Z,R22
.000122 953A          [1] 00401         dec     R19
.000123 F7D9=00011F [1/2] 00402         brne    FormatShiftBCD
.                         00403
.000124 950A          [1] 00404         dec     R16             ; More bits to process?
.000125 F6B1=0000FC [1/2] 00405         brne    Format1
.                         00406
.000126 E0EB          [1] 00407         ldi     R30,low(Dabble) ; Address the packed BCD digits
.000127 E0F1          [1] 00408         ldi     R31,high(Dabble)
.000128 0FE1          [1] 00409         add     R30,R17
.000129 1DF0          [1] 00410         adc     R31,R0
.                         00411
.00012A E0CA          [1] 00412         ldi     R28,low(Out+1+1)        ; Address the output buffer
.00012B E0D1          [1] 00413         ldi     R29,high(Out+1+1)
.                         00414
.00012C                   00415 Format4:
.00012C 9161          [2] 00416         ld      R22,Z+          ; Isolate upper digit
.00012D 2F76          [1] 00417         mov     R23,R22
.00012E 7F60          [1] 00418         andi    R22,$F0
.00012F F411=000132 [1/2] 00419         brne    FormatEmitHi
.                         00420
.000130 2300          [1] 00421         tst     R16             ; Leading zero?
.000131 F021=000136 [1/2] 00422         breq    FormatSkipHi
.                         00423
.000132                   00424 FormatEmitHi:
.000132 9562          [1] 00425         swap    R22             ; Shift into lower nybble
.                         00426
.000133 5D60          [1] 00427         subi    R22,$100-'0'    ; Convert to ASCII numeral
.000134 9369          [2] 00428         st      Y+,R22
.                         00429
.000135 9503          [1] 00430         inc     R16
.                         00431
.000136                   00432 FormatSkipHi:
.000136 707F          [1] 00433         andi    R23,$F          ; Isolate lower digit
.000137 F411=00013A [1/2] 00434         brne    FormatEmitLo
.                         00435
.000138 2300          [1] 00436         tst     R16             ; Leading zero?
.000139 F019=00013D [1/2] 00437         breq    FormatSkipLo
.                         00438
.00013A                   00439 FormatEmitLo:
.00013A 5D70          [1] 00440         subi    R23,$100-'0'    ; Convert to ASCII numeral
.00013B 9379          [2] 00441         st      Y+,R23
.                         00442
.00013C 9503          [1] 00443         inc     R16
.                         00444
.00013D                   00445 FormatSkipLo:
.00013D 952A          [1] 00446         dec     R18             ; More digits?
.00013E F769=00012C [1/2] 00447         brne    Format4
.                         00448
.00013F 2300          [1] 00449         tst     R16             ; Store length of result
.000140 F421=000145 [1/2] 00450         brne    FormatDone      ; Check for all 0's
.                         00451
.000141 E360          [1] 00452         ldi     R22,'0'         ; Default to "0"
.000142 9360 010A     [2] 00453         sts     Out+1+1,R22
.                         00454
.000144 9503          [1] 00455         inc     R16
.                         00456
.000145                   00457 FormatDone:
.000145 9300 0109     [2] 00458         sts     Out+1,R16

Bill · August 26, 2020, 12:21pm

Early last year, I reported on some of my experiments with code generation. It is now being put to use in my Pascal compiler for the 6502.

This is a sample statement showing a straightforward translation:

                          00039 ; W0 := W0 + 1;
                          00040
                          00041 ;   ;  0 := v W0 -> 1
                          00042 ;   ;  1 L r 2
                          00043
                          00044 ;      ;  2 L v W0 -> 3
                          00045 ;      ;  3 + c 1
                          00046
                          00047
                          00048 ;  2 L v W0 -> 3
                          00049 ;  3 + c 1
 0034 18              [2] 00050          clc
 0035 A5 0D           [3] 00051          lda    W0
 0037 69 01           [2] 00052          adc    #1
 0039 AA              [2] 00053          tax
 003A A5 0E           [3] 00054          lda    W0+1
 003C 69 00           [2] 00055          adc    #0
                          00056 ;  1 L r 2
                          00057 ;  0 := v W0 -> 1
 003E 86 0D           [3] 00058          stx    W0
 0040 85 0E           [3] 00059          sta    W0+1

and a much better result when optimization is enabled:

                          00031 ; W0 := W0 + 1;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W0 -> 3
                          00037 ;      ;  3 + c 1
                          00038
                          00039
                          00040 ;  2 L v W0 -> 3
                          00041 ;  3 + c 1
 0029 E6 0D           [5] 00042          inc    W0
 002B D0 02 (002F)  [2/3] 00043          bne    2f
 002D E6 0E           [5] 00044          inc    W0+1
 002F                     00045 2:
                          00046 ;  1 L r 2
                          00047 ;  0 := v W0 -> 1

Surprisingly, an addition of a constant less than 256 can also benefit from a similar transformation:

                          00039 ; W0 := W0 + 2;
                          00040
                          00041 ;   ;  0 := v W0 -> 1
                          00042 ;   ;  1 L r 2
                          00043
                          00044 ;      ;  2 L v W0 -> 3
                          00045 ;      ;  3 + c 2
                          00046
                          00047
                          00048 ;  2 L v W0 -> 3
                          00049 ;  3 + c 2
 0034 18              [2] 00050          clc
 0035 A5 0D           [3] 00051          lda    W0
 0037 69 02           [2] 00052          adc    #2
 0039 AA              [2] 00053          tax
 003A A5 0E           [3] 00054          lda    W0+1
 003C 69 00           [2] 00055          adc    #0
                          00056 ;  1 L r 2
                          00057 ;  0 := v W0 -> 1
 003E 86 0D           [3] 00058          stx    W0
 0040 85 0E           [3] 00059          sta    W0+1

and the better result:

                          00031 ; W0 := W0 + 2;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W0 -> 3
                          00037 ;      ;  3 + c 2
                          00038
                          00039
                          00040 ;  2 L v W0 -> 3
                          00041 ;  3 + c 2
 0029 18              [2] 00042          clc
 002A A5 0D           [3] 00043          lda    W0
 002C 69 02           [2] 00044          adc    #2
 002E 85 0D           [3] 00045          sta    W0
 0030 90 02 (0034)  [2/3] 00046          bcc    2f
 0032 E6 0E           [5] 00047          inc    W0+1
 0034                     00048 2:
                          00049 ;  1 L r 2
                          00050 ;  0 := v W0 -> 1

The strange comments are diagnostics showing the data structures used by the code generator.

Bill · August 27, 2020, 8:03pm

From the Kill Two Birds with One Stone department, the load of a signed number which has to be done anyway, is also used for sign extension, saving two bytes and three cycles.

This code to add a signed byte to a two-byte number:

                          00031 ; W0 := S1 + W2;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v S1 -> 3
                          00037 ;      ;  3 + v W2
                          00038
                          00039
                          00040 ;  2 L v S1 -> 3
                          00041 ;  3 + v W2
 0029 18              [2] 00042          clc
 002A A5 16           [3] 00043          lda    S1
 002C 65 11           [3] 00044          adc    W2
 002E AA              [2] 00045          tax
 002F A5 16           [3] 00046          lda    S1
 0031 09 7F           [2] 00047          ora    #$7F
 0033 30 02 (0037)  [2/3] 00048          bmi    2f
 0035 A9 00           [2] 00049          lda    #0
 0037                     00050 2:
 0037 65 12           [3] 00051          adc    W2+1
                          00052 ;  1 L r 2
                          00053 ;  0 := v W0 -> 1
 0039 86 0D           [3] 00054          stx    W0
 003B 85 0E           [3] 00055          sta    W0+1

becomes:

 003D 18              [2] 00057          clc
 003E A0 00           [2] 00058          ldy    #0
 0040 A5 16           [3] 00059          lda    S1        ; Kill two birds with one stone
 0042 10 01 (0045)  [2/3] 00060          bpl    2f
 0044 88              [2] 00061          dey
 0045                     00062 2:
 0045 65 11           [3] 00063          adc    W2
 0047 AA              [2] 00064          tax
 0048 98              [2] 00065          tya
 0049 65 12           [3] 00066          adc    W2+1
 004B 86 0D           [3] 00067          stx    W0
 004D 85 0E           [3] 00068          sta    W0+1

And this code to add two signed bytes:

                          00031 ; W0 := S1 + S2;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v S1 -> 3
                          00037 ;      ;  3 + v S2
                          00038
                          00039
                          00040 ;  2 L v S1 -> 3
                          00041 ;  3 + v S2
 0029 18              [2] 00042          clc
 002A A5 16           [3] 00043          lda    S1
 002C 65 17           [3] 00044          adc    S2
 002E AA              [2] 00045          tax
 002F A0 00           [2] 00046          ldy    #0
 0031 24 16           [3] 00047          bit    S1
 0033 10 01 (0036)  [2/3] 00048          bpl    2f
 0035 88              [2] 00049          dey
 0036                     00050 2:
 0036 24 17           [3] 00051          bit    S2
 0038 10 01 (003B)  [2/3] 00052          bpl    2f
 003A 88              [2] 00053          dey
 003B                     00054 2:
 003B 98              [2] 00055          tya
 003C 69 00           [2] 00056          adc    #0
                          00057 ;  1 L r 2
                          00058 ;  0 := v W0 -> 1
 003E 86 0D           [3] 00059          stx    W0
 0040 85 0E           [3] 00060          sta    W0+1

becomes:

 0042 18              [2] 00063          clc
 0043 A0 00           [2] 00064          ldy    #0
 0045 A5 16           [3] 00065          lda    S1        ; Kill two birds with one stone
 0047 10 01 (004A)  [2/3] 00066          bpl    2f
 0049 88              [2] 00067          dey
 004A                     00068 2:
 004A 65 17           [3] 00069          adc    S2
 004C AA              [2] 00070          tax
 004D 24 17           [3] 00071          bit    S2
 004F 10 01 (0052)  [2/3] 00072          bpl    2f
 0051 88              [2] 00073          dey
 0052                     00074 2:
 0052 98              [2] 00075          tya
 0053 69 00           [2] 00076          adc    #0
 0055 86 0D           [3] 00077          stx    W0
 0057 85 0E           [3] 00078          sta    W0+1

Edit: the branch in the first example should be bpl, not bmi.

Bill · August 29, 2020, 4:05pm

More code generation, this time adding to a value already in registers:

                          00031 ; B0 := B1 + B2 + 2;
                          00032
                          00033 ;   ;  0 := v B0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v B1 -> 3
                          00037 ;      ;  3 + v B2 -> 4
                          00038 ;      ;  4 + c 2
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v B1 -> 3
                          00043 ;  3 + v B2 -> 4
 0029 18              [2] 00044          clc
 002A A5 1A           [3] 00045          lda    B1
 002C 65 1B           [3] 00046          adc    B2
                          00047 ;  4 + c 2
 002E 18              [2] 00048          clc
 002F 69 02           [2] 00049          adc    #2
                          00050 ;  0 := v B0 -> 1
 0031 85 19           [3] 00051          sta    B0

It knows when not to bother to add:

                          00031 ; B0 := B1 + B2 + 512;
                          00032
                          00033 ;   ;  0 := v B0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v B1 -> 3
                          00037 ;      ;  3 + v B2 -> 4
                          00038 ;      ;  4 + c 512
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v B1 -> 3
                          00043 ;  3 + v B2 -> 4
 0029 18              [2] 00044          clc
 002A A5 1A           [3] 00045          lda    B1
 002C 65 1B           [3] 00046          adc    B2
                          00047 ;  4 + c 512
                          00048 ;  0 := v B0 -> 1
 002E 85 19           [3] 00049          sta    B0

Likewise for two-byte values:

                          00031 ; W0 := W1 + W2 + 513;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W1 -> 3
                          00037 ;      ;  3 + v W2 -> 4
                          00038 ;      ;  4 + c 513
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v W1 -> 3
                          00043 ;  3 + v W2 -> 4
 0029 18              [2] 00044          clc
 002A A5 0F           [3] 00045          lda    W1
 002C 65 11           [3] 00046          adc    W2
 002E AA              [2] 00047          tax
 002F A5 10           [3] 00048          lda    W1+1
 0031 65 12           [3] 00049          adc    W2+1
                          00050 ;  4 + c 513
 0033 18              [2] 00051          clc
 0034 A8              [2] 00052          tay
 0035 8A              [2] 00053          txa
 0036 69 01           [2] 00054          adc    #1
 0038 AA              [2] 00055          tax
 0039 98              [2] 00056          tya
 003A 69 02           [2] 00057          adc    #2
                          00058 ;  0 := v W0 -> 1
 003C 86 0D           [3] 00059          stx    W0
 003E 85 0E           [3] 00060          sta    W0+1

This is a major win:

                          00031 ; W0 := W1 + W2 + 512;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W1 -> 3
                          00037 ;      ;  3 + v W2 -> 4
                          00038 ;      ;  4 + c 512
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v W1 -> 3
                          00043 ;  3 + v W2 -> 4
 0029 18              [2] 00044          clc
 002A A5 0F           [3] 00045          lda    W1
 002C 65 11           [3] 00046          adc    W2
 002E AA              [2] 00047          tax
 002F A5 10           [3] 00048          lda    W1+1
 0031 65 12           [3] 00049          adc    W2+1
                          00050 ;  4 + c 512
 0033 18              [2] 00051          clc
 0034 69 02           [2] 00052          adc    #2
                          00053 ;  0 := v W0 -> 1
 0036 86 0D           [3] 00054          stx    W0
 0038 85 0E           [3] 00055          sta    W0+1

This is ugly, but it has to be done. Can it be improved?

                          00031 ; W0 := W1 + W2 + S0;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W1 -> 3
                          00037 ;      ;  3 + v W2 -> 4
                          00038 ;      ;  4 + v S0
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v W1 -> 3
                          00043 ;  3 + v W2 -> 4
 0029 18              [2] 00044          clc
 002A A5 0F           [3] 00045          lda    W1
 002C 65 11           [3] 00046          adc    W2
 002E AA              [2] 00047          tax
 002F A5 10           [3] 00048          lda    W1+1
 0031 65 12           [3] 00049          adc    W2+1
                          00050 ;  4 + v S0
 0033 18              [2] 00051          clc
 0034 85 25           [3] 00052          sta    BTp00_
 0036 8A              [2] 00053          txa
 0037 65 15           [3] 00054          adc    S0
 0039 AA              [2] 00055          tax
 003A A5 15           [3] 00056          lda    S0
 003C 09 7F           [2] 00057          ora    #$7F
 003E 30 02 (0042)  [2/3] 00058          bmi    2f
 0040 A9 00           [2] 00059          lda    #0
 0042                     00060 2:
 0042 65 25           [3] 00061          adc    BTp00_
                          00062 ;  0 := v W0 -> 1
 0044 86 0D           [3] 00063          stx    W0
 0046 85 0E           [3] 00064          sta    W0+1

Bill · August 29, 2020, 11:39pm

Yes, indeed, here is an improvement. Any more?

                          00031 ; W0 := W1 + W2 + S0;
                          00032
                          00033 ;   ;  0 := v W0 -> 1
                          00034 ;   ;  1 L r 2
                          00035
                          00036 ;      ;  2 L v W1 -> 3
                          00037 ;      ;  3 + v W2 -> 4
                          00038 ;      ;  4 + v S0
                          00039
                          00040
                          00041 ;  1 L r 2
                          00042 ;  2 L v W1 -> 3
                          00043 ;  3 + v W2 -> 4
 0029 18              [2] 00044          clc
 002A A5 0F           [3] 00045          lda    W1
 002C 65 11           [3] 00046          adc    W2
 002E AA              [2] 00047          tax
 002F A5 10           [3] 00048          lda    W1+1
 0031 65 12           [3] 00049          adc    W2+1
                          00050 ;  4 + v S0
 0033 18              [2] 00051          clc
 0034 A8              [2] 00052          tay
 0035 8A              [2] 00053          txa
 0036 65 15           [3] 00054          adc    S0
 0038 AA              [2] 00055          tax
 0039 24 15           [3] 00056          bit    S0
 003B 10 01 (003E)  [2/3] 00057          bpl    2f
 003D 88              [2] 00058          dey
 003E                     00059 2:
 003E 98              [2] 00060          tya
 003F 69 00           [2] 00061          adc    #0
                          00062 ;  0 := v W0 -> 1
 0041 86 0D           [3] 00063          stx    W0
 0043 85 0E           [3] 00064          sta    W0+1

Bill · September 3, 2020, 12:22pm

Many instruction sets do not facilitate the writing of position independent code.

8080 machine code is not inherently relocatable, but the MOVCPM command allows the location of the operating system to be changed.

How do they do this?

Embedded in MOVCPM is a bit map of the bytes in the system image indicating which need to be adjusted. Their method only allows movement in increments of 256 bytes.

How do they create this table?

One way is to build the system twice, ORGed a multiple of 256 bytes apart. The bytes which differ are the ones dependent upon the load location.

I want to build a similar system but one allowing an arbitrary relocation offset. If an image is built at one byte and 256 byte offsets, two bit maps can be built to specify which bytes need to be adjusted with the low and high bytes of the load offset.

Those bytes which differ in the off-by-one image but not in the off-by-256 one are to be adjusted with the low byte of the relocation offset.

Comments?

 0200                     00001          org    $200
                          00002
 0200                     00003 Start
 0200 4C 02FF         [3] 00004          jmp    Target
                          00005
 0203 FF                  00006          fcb    <Target
 0204 02FF                00007          fdb    Target
 0206 02                  00008          fcb    >Target
                          00009
 0207                     00010          rmb    255-3-2-1-1
                          00011
 02FF                     00012 Target

 0201                     00001          org    $201
                          00002
 0201                     00003 Start
 0201 4C 0300         [3] 00004          jmp    Target
                          00005
 0204 00                  00006          fcb    <Target
 0205 0300                00007          fdb    Target
 0207 03                  00008          fcb    >Target
                          00009
 0208                     00010          rmb    255-3-2-1-1
                          00011
 0300                     00012 Target

 0300                     00001          org    $300
                          00002
 0300                     00003 Start
 0300 4C 03FF         [3] 00004          jmp    Target
                          00005
 0303 FF                  00006          fcb    <Target
 0304 03FF                00007          fdb    Target
 0306 03                  00008          fcb    >Target
                          00009
 0307                     00010          rmb    255-3-2-1-1
                          00011
 03FF                     00012 Target

Bill · September 4, 2020, 12:11pm

It turns out that my original analysis was flawed.

Consider this:

    fcb     >Address

The information is not available at load time whether an adjustment to the missing low byte of the address will carry into the high byte.

So I will restrict relocation to multiples of 256.

For reference, this is the “resident” portion of the EXEC command; it reads and executes commands from a text file.

.0B03                     00220 DoExec
.0B03 A2 C2           [2] 00221          ldx    #<ExecFCB  ; Get name of file to execute
.0B05 A9 0B           [2] 00222          lda    #>ExecFCB
.0B07 20 022D         [6] 00223          jsr    GETFIL    ; Get File Specification
.0B0A B0 4B (0B57)  [2/3] 00224          bcs    Error     ; Branch if error
.                         00225
.0B0C A0 01           [2] 00226          ldy    #ExtTXT   ; Default to .TXT
.0B0E A2 C2           [2] 00227          ldx    #<ExecFCB
.0B10 A9 0B           [2] 00228          lda    #>ExecFCB
.0B12 20 0233         [6] 00229          jsr    SETEXT    ; Set Extension
.                         00230
.0B15 A9 01           [2] 00231          lda    #FMSO4R   ; Open file for reading
.0B17 8D 0BC2         [4] 00232          sta    ExecFCB+FCBFun
.0B1A A2 C2           [2] 00233          ldx    #<ExecFCB
.0B1C A9 0B           [2] 00234          lda    #>ExecFCB
.0B1E 20 0260         [6] 00235          jsr    FMS
.0B21 D0 34 (0B57)  [2/3] 00236          bne    Error     ; If error
.                         00237
.0B23 A9 04           [2] 00238          lda    #FMSCLO   ; Close the file
.0B25 8D 0BC2         [4] 00239          sta    ExecFCB+FCBFun
.0B28 A2 C2           [2] 00240          ldx    #<ExecFCB
.0B2A A9 0B           [2] 00241          lda    #>ExecFCB
.0B2C 20 0260         [6] 00242          jsr    FMS
.0B2F D0 26 (0B57)  [2/3] 00243          bne    Error     ; If error
.                         00244
.0B31 A9 01           [2] 00245          lda    #1        ; Set FCB to read from the file
.0B33 8D 0BC4         [4] 00246          sta    ExecFCB+FCBSts
.0B36 A9 00           [2] 00247          lda    #0
.0B38 8D 0BC2         [4] 00248          sta    ExecFCB+FCBFun
.                         00249
.0B3B                     00250 ExecLoop
.0B3B 20 0B5D         [6] 00251          jsr    Readln    ; Read a command
.0B3E B0 17 (0B57)  [2/3] 00252          bcs    Error     ; If error
.                         00253
.0B40 AD 0BC0         [4] 00254          lda    EOF       ; End of file?
.0B43 D0 0C (0B51)  [2/3] 00255          bne    Exit      ; Yes
.                         00256
.0B45 20 024B         [6] 00257          jsr    DOCMND    ; Execute the command
.0B48 F0 F1 (0B3B)  [2/3] 00258          beq    ExecLoop  ; If no error
.                         00259
.0B4A A2 A1           [2] 00260          ldx    #<Aborted  ; Say 'EXEC ABORTED'
.0B4C A9 0B           [2] 00261          lda    #>Aborted
.0B4E 20 021E         [6] 00262          jsr    PSTRNG
.                         00263
.0B51                     00264 Exit
.0B51 20 025D         [6] 00265          jsr    FMSCLS    ; Close FMS
.                         00266
.0B54 4C 0203         [3] 00267          jmp    WARMS     ; Back to FLEX
.                         00268
.0B57                     00269 Error
.0B57 20 023F         [6] 00270          jsr    RPTERR    ; Report Error
.                         00271
.0B5A 4C 0B51         [3] 00272          jmp    Exit
.                         00273
.                         00274 ;
.                         00275 ; Read a line
.                         00276 ;
.0B5D                     00277 Readln
.0B5D A2 00           [2] 00278          ldx    #<CmdBuf  ; Point to input buffer
.0B5F 8E 02A4         [4] 00279          stx    LINBUF
.0B62 A2 01           [2] 00280          ldx    #>CmdBuf
.0B64 8E 02A5         [4] 00281          stx    LINBUF+1
.                         00282
.0B67 A2 00           [2] 00283          ldx    #0        ; Index first character of buffer
.0B69 8E 0BC1         [4] 00284          stx    BufPtr
.                         00285
.0B6C                     00286 ReadLoop
.0B6C A2 C2           [2] 00287          ldx    #<ExecFCB  ; Read a character
.0B6E A9 0B           [2] 00288          lda    #>ExecFCB
.0B70 20 0260         [6] 00289          jsr    FMS
.0B73 D0 14 (0B89)  [2/3] 00290          bne    ChkErr
.                         00291
.0B75 C9 02           [2] 00292          cmp    #2        ; Quit if binary file
.0B77 F0 1E (0B97)  [2/3] 00293          beq    BadFile
.                         00294
.0B79 AE 0BC1         [4] 00295          ldx    BufPtr    ; Store the character
.0B7C 9D 0100         [5] 00296          sta    CmdBuf,X
.0B7F E8              [2] 00297          inx
.0B80 8E 0BC1         [4] 00298          stx    BufPtr
.                         00299
.0B83 C9 0D           [2] 00300          cmp    #$D       ; End of line?
.0B85 D0 E5 (0B6C)  [2/3] 00301          bne    ReadLoop  ; No, go read another
.                         00302
.0B87 18              [2] 00303          clc
.                         00304
.0B88 60              [6] 00305          rts
.                         00306
.0B89                     00307 ChkErr
.0B89 AD 0BC3         [4] 00308          lda    ExecFCB+FCBErr  ; Get error code
.0B8C C9 08           [2] 00309          cmp    #ErrEOF   ; Is it end of file?
.0B8E D0 05 (0B95)  [2/3] 00310          bne    GotError  ; No
.                         00311
.0B90 8D 0BC0         [4] 00312          sta    EOF       ; Indicate end of file
.                         00313
.0B93 18              [2] 00314          clc
.                         00315
.0B94 60              [6] 00316          rts
.                         00317
.0B95                     00318 GotError
.0B95 38              [2] 00319          sec
.                         00320
.0B96 60              [6] 00321          rts
.                         00322
.0B97                     00323 BadFile
.0B97 A2 AE           [2] 00324          ldx    #<Illegal
.0B99 A9 0B           [2] 00325          lda    #>Illegal
.                         00326
.0B9B 20 021E         [6] 00327          jsr    PSTRNG    ; Print String
.                         00328
.0B9E 4C 0B51         [3] 00329          jmp    Exit
.                         00330
.0BA1                     00331 Aborted
.0BA1 455845432041424F    00332          fcc    'EXEC ABORTED'
.0BA9 52544544
.0BAD 04                  00333          fcb    4
.                         00334
.0BAE                     00335 Illegal
.0BAE 494C4C4547414C20    00336          fcc    'ILLEGAL FILE TYPE'
.0BB6 46494C4520545950
.0BBE 45
.0BBF 04                  00337          fcb    4
.                         00338
.0BC0 00                  00339 EOF      fcb    0         ; Not zero if end of file
.                         00340
.0BC1                     00341 BufPtr   rmb    1         ; Index into command line buffer
.                         00342
.0BC2                     00343 ExecFCB

Bill · September 6, 2020, 1:59pm

I got the EXEC command working on the 6502. The code to open the file, copy the “resident” portion to the top of RAM and do the relocation adjustments was substantially larger than the resident portion which reads lines from the script file, passes the commands to FLEX and cleans up after itself. About 256 bytes versus about 150 (plus 320 for the file control block.)

Being twisted, I had to try executing another EXEC command from a script. Well, chaining from one script to another worked, but when the latter finished, the first instance of EXEC never regained control. I had forgotten that FLEX stores some system state in static variables, so the second instance of EXEC causes the first to be forgotten.

I am now pondering whether to find a fix for this as nested scripts can be useful in the same way that nested include files are sometimes handy.

Bill · September 13, 2020, 1:27pm

I made a small “breakthrough” in hacking the TSC BASIC interpreter. The keyword table has finally been identified. It is encoded using the same algorithm as the command table. Each character is doubled, then biased by 26.

No obvious reason for doing this other than hiding the previously disclosed Easter Egg in the command table. It is not faster nor do they save any space in the table.

 16A0                     03409 L_16A0
 16A0 74 84 8E 84         03410          fcb    'G'*2-26,'O'*2-26,'T'*2-26,'O'*2-26,0,1
 16A4 00 01
 16A6 74 84 8C 90         03411          fcb    'G'*2-26,'O'*2-26,'S'*2-26,'U'*2-26,'B'*2-26,0,2
 16AA 6A 00 02
 16AD 8A 70 8C 90         03412          fcb    'R'*2-26,'E'*2-26,'S'*2-26,'U'*2-26,'M'*2-26,'E'*2-26,0,3
 16B1 80 70 00 03
 16B5 8A 70 80 00         03413          fcb    'R'*2-26,'E'*2-26,'M'*2-26,0,5
 16B9 05
 16BA 7E 70 8E 00         03414          fcb    'L'*2-26,'E'*2-26,'T'*2-26,0,6
 16BE 06
 16BF 86 8A 78 82         03415          fcb    'P'*2-26,'R'*2-26,'I'*2-26,'N'*2-26,'T'*2-26,0,7
 16C3 8E 00 07
 16C6 78 82 86 90         03416          fcb    'I'*2-26,'N'*2-26,'P'*2-26,'U'*2-26,'T'*2-26,0,8
 16CA 8E 00 08
 16CD 72 84 8A 00         03417          fcb    'F'*2-26,'O'*2-26,'R'*2-26,0,11
 16D1 0B
 16D2 86 84 7C 70         03418          fcb    'P'*2-26,'O'*2-26,'K'*2-26,'E'*2-26,0,12
 16D6 00 0C
 16D8 82 70 96 8E         03419          fcb    'N'*2-26,'E'*2-26,'X'*2-26,'T'*2-26,0,14
 16DC 00 0E
 16DE 8A 70 68 6E         03420          fcb    'R'*2-26,'E'*2-26,'A'*2-26,'D'*2-26,0,15
 16E2 00 0F
 16E4 8A 70 8E 90         03421          fcb    'R'*2-26,'E'*2-26,'T'*2-26,'U'*2-26,'R'*2-26,'N'*2-26,0,16
 16E8 8A 82 00 10
 16EC 78 72 00 11         03422          fcb    'I'*2-26,'F'*2-26,0,17
 16F0 6E 78 80 00         03423          fcb    'D'*2-26,'I'*2-26,'M'*2-26,0,18
 16F4 12
 16F5 84 82 00 13         03424          fcb    'O'*2-26,'N'*2-26,0,19
 16F9 6E 70 72 00         03425          fcb    'D'*2-26,'E'*2-26,'F'*2-26,0,20
 16FD 14
 16FE 70 82 6E 00         03426          fcb    'E'*2-26,'N'*2-26,'D'*2-26,0,21
 1702 15

Bill · September 15, 2020, 8:58pm

I have been playing with code generation on different processors.

This line of code

    W0 := S1;

sign extends a byte into two.

For the 6502:

                          00037 ;  1 L v S1
 0029 A0 00           [2] 00038          ldy    #0
 002B A6 16           [3] 00039          ldx    S1
 002D 10 01 (0030)  [2/3] 00040          bpl    2f
 002F 88              [2] 00041          dey
 0030                     00042 2:
 0030 98              [2] 00043          tya
                          00044 ;  0 := v W0 -> 1
 0031 86 0D           [3] 00045          stx    W0
 0033 85 0E           [3] 00046          sta    W0+1

For the 6800:

                          00037 *  1 L v S1
 0029 4F              [2] 00038          clra
 002A D6 16           [3] 00039          ldab   S1
 002C 2A 01 (002F)    [4] 00040          bpl    2f
 002E 4A              [2] 00041          deca
 002F                     00042 2:
                          00043 *  0 := v W0 -> 1
 002F 97 0D           [4] 00044          staa   W0
 0031 D7 0E           [4] 00045          stab   W0+1

For the 8080:

                          00037 ;  1 L v S0
 0100 3A 0015        [13] 00038         lda     S0
 0103 6F              [5] 00039         mov     L,A
 0104 17              [4] 00040         ral
 0105 9F              [4] 00041         sbb     A
 0106 67              [5] 00042         mov     H,A
                          00043 ;  0 := v W0 -> 1
 0107 22 000D        [16] 00044         shld    W0

For the 9900:

                          00043 *  1 L v S1
 0052 D020 0037           00044         movb    @S1,R0
 0056 0880                00045         sra     R0,8
                          00046 *  0 := v W0 -> 1
 0058 C800 002E           00047         mov     R0,@W0

For the 68000:

                                  00009 ;  1 L v S1
 00000400 1038 0421               00010         move.b  S1,D0
 00000404 4880                    00011         ext.w   D0
                                  00012 ;  0 := v W0 -> 1
 00000406 31C0 0418               00013         move.w  D0,W0

That almost feels like cheating.

It is interesting that the last four examples have each been ten bytes long.

Real cheating would be the 80386:

    movsx   AX,[S1]
    mov     [W0],AX

For the AVR:

                          00011 ;  1 L v S1
 000060 9160 0116     [2] 00012         lds     R22,S1
 000062 2F76          [1] 00013         mov     R23,R22
 000063 0F77          [1] 00014         lsl     R23
 000064 0B77          [1] 00015         sbc     R23,R23
                          00016 ;  0 := v W0 -> 1
 000065 9360 010D     [2] 00017         sts     W0,R22
 000067 9370 010E     [2] 00018         sts     W0+1,R23

And finally, for the 6809:

                                  00037 *  1 L v S1
 0029 D6 16                   [4] 00038          ldb    S1
 002B 1D                      [2] 00039          sex
                                  00040 *  0 := v W0 -> 1
 002C DD 0D                   [5] 00041          std    W0

which brings up one of my favorite programming jokes:

The Motorola 6809, where SEX is sometimes followed by STD.

Thank you very much. Drive safely…

repose · October 3, 2020, 11:38pm

Bill,
Fantastic work. I’ve enjoyed reading about your journey. I have worked on a 6502 basic compiler myself and considered the same problem with a for/next loop. I’ve also considered writing a Python interpreter; however I would rather go the pyc route, because once implemented, the entire language would be doable. The format is stable now as well, so it’s not an issue. As for c64 questions, I could answer any of them. You’ve asked what you should do next; my vote would be calling an assembly routine with arguments in the 3 registers, then missing functionality could at least be added to make more than toy programs.

Bill · October 8, 2020, 3:01pm

Thank you for your interest.

I am building a Python compiler. As opposed to an interpreter. The rationale is that an older processor cannot hope to be able to host much of a python interpreter. By converting Python code to assembly language, only those portions of the language actually used needs to be included in the generated program.

Is your plan to interpret the pyc or to use it to do part the work of parsing?

Are you local so that we may discuss this further in person once the COVID threat has diminished?

Bill · October 8, 2020, 3:01pm

A new module called the code depessimizer has been added to my ever growing pile of compiler technology. The name is partially out of my amusement with the term “pessimizing compiler” and a belief that a compiler cannot hope to generate the “best code” a human programmer cannot beat, at least on older classic processors. It makes improvements to the tree used when generating assembly language instructions.

An example of it in action:

                          00031 * W0 := 1 + W1 + 2 + W2 - 3;
                          00032
                          00033 *   *  0 := v W0 -> 1
                          00034 *   *  1 L r 2
                          00035
                          00036 *      *  2 L c 1 -> 3
                          00037 *      *  3 + v W1 -> 4
                          00038 *      *  4 + c 2 -> 5
                          00039 *      *  5 + v W2 -> 6
                          00040 *      *  6 - c 3
                          00041
                          00042
                          00043 *  delete  *  4 + c 2 -> 5
                          00044 *  delete  *  6 - c 3
                          00045 *  merge  *  3 + v W1 -> 5
                          00046
                          00047 *   *  0 := v W0 -> 1
                          00048 *   *  1 L r 2
                          00049
                          00050 *      *  2 L v W1 -> 5
                          00051 *      *  5 + v W2
                          00052
                          00053
                          00054 *  1 L r 2
                          00055 *  2 L v W1 -> 5
                          00056 *  5 + v W2
 0029 D6 10           [3] 00057          ldab   W1+1
 002B DB 12           [3] 00058          addb   W2+1
 002D 96 0F           [3] 00059          ldaa   W1
 002F 99 11           [3] 00060          adca   W2
                          00061 *  0 := v W0 -> 1
 0031 97 0D           [4] 00062          staa   W0
 0033 D7 0E           [4] 00063          stab   W0+1

Bill · October 9, 2020, 8:49pm

More work on the depessimizer. Now variables can cancel out…

                          00320 ; 00013     W0 := W1 + W2 - W1;
                          00321 *   *  0 := v W0 -> 1
                          00322 *   *  1 L r 2
                          00323
                          00324 *      *  2 L v W1 -> 3
                          00325 *      *  3 + v W2 -> 4
                          00326 *      *  4 - v W1
                          00327 *  delete  *  4 - v W1
                          00328 *  change  *  2 L c 0 -> 3
                          00329 *  merge  *  3 + v W2
                          00330 *  delete  *  2 L v W2
                          00331
                          00332 *   *  0 := v W0 -> 1
                          00333 *   *  1 L v W2
                          00334
                          00335
                          00336 *  1 L v W2
 0106 DE 40           [4] 00337          ldx    W2
                          00338 *  0 := v W0 -> 1
 0108 DF 38           [5] 00339          stx    W0

While somebody is not likely to write code like this, other transformations may give this as a result.

repose · October 10, 2020, 10:29pm

Hi Bill,
My bad! I didn’t mean to type “also” - yes you are writing a compiler :). Your rationale is good. My dream is to be able to edit and run Python directly on an 8 bit - certainly a challenge! I had considered the consequences, but then realized I could do incremental compiling (not JIT, but differential compiling as you edit the code). Sorry, I’m a Canadian, I could give you my Skype I guess.
I was going to comment on writing efficient code - there are machine learning techniques now that can do that, and I’m a Data Scientist.
Another project I’m interested in is writing real a.i. code for an 8 bit, perhaps a handwriting recognizer. Such things can use an 8-bit predictor with nearly the same accuracy as PC, but they use a special number format called a posit, or positional bit. It’s a bit slow, but not as slow as using 16-bit numbers.
I think what you’re doing is like a peephole optimizer.

Bill · October 12, 2020, 1:48am

If your target system supports overlays, you have at least a fighting chance to load parts of an interpreter as they are needed. 64 Kbytes of address space is not much if you are not writing everything in assembly language. And even then, Python is huge.

Sort of, but it operates on a parse tree. I will also need one to make a pass over the resulting assembly language code.

Bill · October 12, 2020, 1:49am

Perhaps this complicated expression will illustrate the process.

Matching additions and subtractions of a variable are found; the second is deleted and the first converted to a constant zero. These constant zeroes are later folded together.

                          00320 ; 00013     W0 := 2 - W1 + W2 - W1 + W2 + W1 - W2 - 1;
                          00321 *   *  0 := v W0 -> 1
                          00322 *   *  1 L r 2
                          00323
                          00324 *      *  2 L c 2 -> 3
                          00325 *      *  3 - v W1 -> 4
                          00326 *      *  4 + v W2 -> 5
                          00327 *      *  5 - v W1 -> 6
                          00328 *      *  6 + v W2 -> 7
                          00329 *      *  7 + v W1 -> 8
                          00330 *      *  8 - v W2 -> 9
                          00331 *      *  9 - c 1
                          00332 *  delete  *  7 + v W1 -> 8
                          00333 *  change  *  3 - c 0 -> 4
                          00334 *  delete  *  8 - v W2 -> 9
                          00335 *  change  *  4 + c 0 -> 5
                          00336 *  delete  *  3 - c 0 -> 4
                          00337 *  delete  *  4 + c 0 -> 5
                          00338 *  delete  *  9 - c 1
                          00339
                          00340 *   *  0 := v W0 -> 1
                          00341 *   *  1 L r 2
                          00342
                          00343 *      *  2 L c 1 -> 5
                          00344 *      *  5 - v W1 -> 6
                          00345 *      *  6 + v W2
                          00346
                          00347
                          00348 *  1 L r 2
                          00349 *  2 L c 1 -> 5
                          00350 *  5 - v W1 -> 6
 0106 4F              [2] 00351          clra
 0107 C6 01           [2] 00352          ldab   #1
 0109 D0 35           [3] 00353          subb   W1+1
 010B 92 34           [3] 00354          sbca   W1
                          00355 *  6 + v W2
 010D DB 41           [3] 00356          addb   W2+1
 010F 99 40           [3] 00357          adca   W2
                          00358 *  0 := v W0 -> 1
 0111 97 38           [4] 00359          staa   W0
 0113 D7 39           [4] 00360          stab   W0+1

Bill · October 18, 2020, 7:03pm

Borland Turbo Pascal permits AND, OR and XOR operations for many compatible types.

This is the beginning of code generation for OR and XOR.

For operations with two variables, OR and XOR are very similar.

Of particular interest is the first assignment. Unlike addition and subtraction, ORing and XORing extended signs never accumulate or cancel. It is sufficient to determine the result of operating on the two bytes and sign extend that.

                          00320 ; 00013     W0 := S1 or S2;
 0106 D6 36           [3] 00321          ldab   S1
 0108 DA 3A           [3] 00322          orab   S2
 010A 86 7F           [2] 00323          ldaa   #$7F      ; Thanks Mike B!
 010C 11              [2] 00324          cba
 010D 82 7F           [2] 00325          sbca   #$7F
 010F 97 38           [4] 00326          staa   W0
 0111 D7 39           [4] 00327          stab   W0+1
                          00328 ; 00014     W0 := W1 or S2;
 0113 D6 3A           [3] 00329          ldab   S2
 0115 86 7F           [2] 00330          ldaa   #$7F      ; Thanks Mike B!
 0117 11              [2] 00331          cba
 0118 82 7F           [2] 00332          sbca   #$7F
 011A DA 35           [3] 00333          orab   W1+1
 011C 9A 34           [3] 00334          oraa   W1
 011E 97 38           [4] 00335          staa   W0
 0120 D7 39           [4] 00336          stab   W0+1
                          00337 ; 00015     W0 := S1 or W2;
 0122 D6 36           [3] 00338          ldab   S1
 0124 86 7F           [2] 00339          ldaa   #$7F      ; Thanks Mike B!
 0126 11              [2] 00340          cba
 0127 82 7F           [2] 00341          sbca   #$7F
 0129 DA 41           [3] 00342          orab   W2+1
 012B 9A 40           [3] 00343          oraa   W2
 012D 97 38           [4] 00344          staa   W0
 012F D7 39           [4] 00345          stab   W0+1
                          00346 ; 00016     W0 := B1 or B2;
 0131 D6 18           [3] 00347          ldab   B1
 0133 DA 1A           [3] 00348          orab   B2
 0135 4F              [2] 00349          clra
 0136 97 38           [4] 00350          staa   W0
 0138 D7 39           [4] 00351          stab   W0+1
                          00352 ; 00017     W0 := W1 or B2;
 013A D6 35           [3] 00353          ldab   W1+1
 013C DA 1A           [3] 00354          orab   B2
 013E 96 34           [3] 00355          ldaa   W1
 0140 97 38           [4] 00356          staa   W0
 0142 D7 39           [4] 00357          stab   W0+1
                          00358 ; 00018     W0 := B1 or W2;
 0144 D6 18           [3] 00359          ldab   B1
 0146 DA 41           [3] 00360          orab   W2+1
 0148 96 40           [3] 00361          ldaa   W2
 014A 97 38           [4] 00362          staa   W0
 014C D7 39           [4] 00363          stab   W0+1
                          00364 ; 00019     W0 := W1 or W2;
 014E D6 35           [3] 00365          ldab   W1+1
 0150 DA 41           [3] 00366          orab   W2+1
 0152 96 34           [3] 00367          ldaa   W1
 0154 9A 40           [3] 00368          oraa   W2
 0156 97 38           [4] 00369          staa   W0
 0158 D7 39           [4] 00370          stab   W0+1
                          00371 ; 00020     B0 := B1 or B2;
 015A 96 18           [3] 00372          ldaa   B1
 015C 9A 1A           [3] 00373          oraa   B2
 015E 97 16           [4] 00374          staa   B0
                          00375 ; 00021     W0 := S1 xor S2;
 0160 D6 36           [3] 00376          ldab   S1
 0162 D8 3A           [3] 00377          eorb   S2
 0164 86 7F           [2] 00378          ldaa   #$7F      ; Thanks Mike B!
 0166 11              [2] 00379          cba
 0167 82 7F           [2] 00380          sbca   #$7F
 0169 97 38           [4] 00381          staa   W0
 016B D7 39           [4] 00382          stab   W0+1

The depessimizer needed a little bit of adjustment.

B1 or B3 - B1 is not necessarily the same as B3.

                          00320 ; 00013     B0 := B1 or B3 - B1;
                          00321 *   *  0 := v B0 -> 1
                          00322 *   *  1 L r 2
                          00323
                          00324 *      *  2 L v B1 -> 3
                          00325 *      *  3 | v B3 -> 4
                          00326 *      *  4 - v B1
                          00327 *  1 L r 2
                          00328 *  2 L v B1 -> 3
                          00329 *  3 | v B3 -> 4
 0106 96 18           [3] 00330          ldaa   B1
 0108 9A 1C           [3] 00331          oraa   B3
                          00332 *  4 - v B1
 010A 90 18           [3] 00333          suba   B1
                          00334 *  0 := v B0 -> 1
 010C 97 16           [4] 00335          staa   B0

However, variables in subexpressions on either side of an OR operation are allowed to cancel out:

                          00337 ; 00014     B0 := B2 + B1 - B2 or B3 + B1 + B2 - B1;
                          00338 *   *  0 := v B0 -> 1
                          00339 *   *  1 L r 2
                          00340
                          00341 *      *  2 L v B2 -> 3
                          00342 *      *  3 + v B1 -> 4
                          00343 *      *  4 - v B2 -> 5
                          00344 *      *  5 | v B3 -> 6
                          00345 *      *  6 + v B1 -> 7
                          00346 *      *  7 + v B2 -> 8
                          00347 *      *  8 - v B1
                          00348 *  delete  *  4 - v B2 -> 5
                          00349 *  change  *  2 L c 0 -> 3
                          00350 *  delete  *  8 - v B1
                          00351 *  change  *  6 + c 0 -> 7
                          00352 *  delete  *  6 + c 0 -> 7
                          00353 *  merge  *  3 + v B1 -> 5
                          00354
                          00355 *   *  0 := v B0 -> 1
                          00356 *   *  1 L r 2
                          00357
                          00358 *      *  2 L v B1 -> 5
                          00359 *      *  5 | v B3 -> 7
                          00360 *      *  7 + v B2
                          00361
                          00362
                          00363 *  1 L r 2
                          00364 *  2 L v B1 -> 5
                          00365 *  5 | v B3 -> 7
 0110 96 18           [3] 00366          ldaa   B1
 0112 9A 1C           [3] 00367          oraa   B3
                          00368 *  7 + v B2
 0114 DB 1A           [3] 00369          addb   B2
                          00370 *  0 := v B0 -> 1
 0116 97 16           [4] 00371          staa   B0

Next up is generating code to OR and XOR a variable with a constant. This will present many opportunities to save a few cycles or bytes here and there.

Edit: Found and fixed a bug in B0 := B1 or B3 - B1 example

Bill · October 30, 2020, 7:27pm

These are some of the noteworthy examples from ORing or XORing with constants.

Taking advantage of the fact that anything ORed with one is one:

                          00321 ; 00013     W0 := W1 or $FF00;
 0106 D6 35           [3] 00322          ldab   W1+1
 0108 86 FF           [2] 00323          ldaa   #$FF
 010A 97 38           [4] 00324          staa   W0
 010C D7 39           [4] 00325          stab   W0+1
                          00326 ; 00014     W0 := W1 or $FF;
 010E 96 34           [3] 00327          ldaa   W1
 0110 C6 FF           [2] 00328          ldab   #$FF
 0112 97 38           [4] 00329          staa   W0
 0114 D7 39           [4] 00330          stab   W0+1

Something XORed with zero is unchanged:

                          00331 ; 00015     W0 := B1 xor $9900;
 0116 D6 18           [3] 00332          ldab   B1
 0118 86 99           [2] 00333          ldaa   #153
 011A 97 38           [4] 00334          staa   W0
 011C D7 39           [4] 00335          stab   W0+1

XORing with one flips the bit. On the 6800, we have an instruction for that. Same speed but only a single byte:

                          00336 ; 00016     W0 := W1 xor $FF00;
 011E 96 34           [3] 00337          ldaa   W1
 0120 D6 35           [3] 00338          ldab   W1+1
 0122 43              [2] 00339          coma
 0123 97 38           [4] 00340          staa   W0
 0125 D7 39           [4] 00341          stab   W0+1
                          00342 ; 00017     W0 := W1 xor $FF;
 0127 96 34           [3] 00343          ldaa   W1
 0129 D6 35           [3] 00344          ldab   W1+1
 012B 53              [2] 00345          comb
 012C 97 38           [4] 00346          staa   W0
 012E D7 39           [4] 00347          stab   W0+1

This is the usual sign extension:

                          00348 ; 00018     W0 := S1;
 0130 D6 36           [3] 00349          ldab   S1
 0132 86 7F           [2] 00350          ldaa   #$7F      ; Thanks Mike B!
 0134 11              [2] 00351          cba
 0135 82 7F           [2] 00352          sbca   #$7F
 0137 97 38           [4] 00353          staa   W0
 0139 D7 39           [4] 00354          stab   W0+1

No need to actually do the sign extension here…

                          00355 ; 00019     W0 := S1 or $FF00;
 013B D6 36           [3] 00356          ldab   S1
 013D 86 FF           [2] 00357          ldaa   #$FF
 013F 97 38           [4] 00358          staa   W0
 0141 D7 39           [4] 00359          stab   W0+1

And here we want the inverse of that:

                          00360 ; 00020     W0 := S1 xor $FF00;
 0143 D6 36           [3] 00361          ldab   S1
 0145 86 7F           [2] 00362          ldaa   #$7F      ; Thanks Mike B!
 0147 11              [2] 00363          cba
 0148 89 80           [2] 00364          adca   #$80
 014A 97 38           [4] 00365          staa   W0
 014C D7 39           [4] 00366          stab   W0+1

Bill · March 14, 2021, 6:36pm

Gotta love Python.

I need to get to thinking of Python first as the go-to tool.

I have pondered for a long time how to estimate the number of digits needed to represent a binary number in decimal.

While thinking during a shower about how to write a test program and particularly how to format a number with a comma every three digits for easier counting, the answer became obvious: Python!

I = 0
while I <= 180:
    print(I, "  ", "{:,}".format(2 ** I))
    I = I + 1

0    1
1    2
2    4
3    8
4    16
5    32
6    64
7    128
8    256
9    512
10    1,024
11    2,048
12    4,096
13    8,192
14    16,384
15    32,768
16    65,536
17    131,072
18    262,144
19    524,288
20    1,048,576
21    2,097,152
22    4,194,304
23    8,388,608
24    16,777,216
25    33,554,432
26    67,108,864
27    134,217,728
28    268,435,456
29    536,870,912
30    1,073,741,824
31    2,147,483,648
32    4,294,967,296
33    8,589,934,592
34    17,179,869,184
35    34,359,738,368
36    68,719,476,736
37    137,438,953,472
38    274,877,906,944
39    549,755,813,888
40    1,099,511,627,776
41    2,199,023,255,552
42    4,398,046,511,104
43    8,796,093,022,208
44    17,592,186,044,416
45    35,184,372,088,832
46    70,368,744,177,664
47    140,737,488,355,328
48    281,474,976,710,656
49    562,949,953,421,312
50    1,125,899,906,842,624
51    2,251,799,813,685,248
52    4,503,599,627,370,496
53    9,007,199,254,740,992
54    18,014,398,509,481,984
55    36,028,797,018,963,968
56    72,057,594,037,927,936
57    144,115,188,075,855,872
58    288,230,376,151,711,744
59    576,460,752,303,423,488
60    1,152,921,504,606,846,976
61    2,305,843,009,213,693,952
62    4,611,686,018,427,387,904
63    9,223,372,036,854,775,808
64    18,446,744,073,709,551,616
65    36,893,488,147,419,103,232
66    73,786,976,294,838,206,464
67    147,573,952,589,676,412,928
68    295,147,905,179,352,825,856
69    590,295,810,358,705,651,712
70    1,180,591,620,717,411,303,424
71    2,361,183,241,434,822,606,848
72    4,722,366,482,869,645,213,696
73    9,444,732,965,739,290,427,392
74    18,889,465,931,478,580,854,784
75    37,778,931,862,957,161,709,568
76    75,557,863,725,914,323,419,136
77    151,115,727,451,828,646,838,272
78    302,231,454,903,657,293,676,544
79    604,462,909,807,314,587,353,088
80    1,208,925,819,614,629,174,706,176
81    2,417,851,639,229,258,349,412,352
82    4,835,703,278,458,516,698,824,704
83    9,671,406,556,917,033,397,649,408
84    19,342,813,113,834,066,795,298,816
85    38,685,626,227,668,133,590,597,632
86    77,371,252,455,336,267,181,195,264
87    154,742,504,910,672,534,362,390,528
88    309,485,009,821,345,068,724,781,056
89    618,970,019,642,690,137,449,562,112
90    1,237,940,039,285,380,274,899,124,224
91    2,475,880,078,570,760,549,798,248,448
92    4,951,760,157,141,521,099,596,496,896
93    9,903,520,314,283,042,199,192,993,792
94    19,807,040,628,566,084,398,385,987,584
95    39,614,081,257,132,168,796,771,975,168
96    79,228,162,514,264,337,593,543,950,336
97    158,456,325,028,528,675,187,087,900,672
98    316,912,650,057,057,350,374,175,801,344
99    633,825,300,114,114,700,748,351,602,688
100    1,267,650,600,228,229,401,496,703,205,376
101    2,535,301,200,456,458,802,993,406,410,752
102    5,070,602,400,912,917,605,986,812,821,504
103    10,141,204,801,825,835,211,973,625,643,008
104    20,282,409,603,651,670,423,947,251,286,016
105    40,564,819,207,303,340,847,894,502,572,032
106    81,129,638,414,606,681,695,789,005,144,064
107    162,259,276,829,213,363,391,578,010,288,128
108    324,518,553,658,426,726,783,156,020,576,256
109    649,037,107,316,853,453,566,312,041,152,512
110    1,298,074,214,633,706,907,132,624,082,305,024
111    2,596,148,429,267,413,814,265,248,164,610,048
112    5,192,296,858,534,827,628,530,496,329,220,096
113    10,384,593,717,069,655,257,060,992,658,440,192
114    20,769,187,434,139,310,514,121,985,316,880,384
115    41,538,374,868,278,621,028,243,970,633,760,768
116    83,076,749,736,557,242,056,487,941,267,521,536
117    166,153,499,473,114,484,112,975,882,535,043,072
118    332,306,998,946,228,968,225,951,765,070,086,144
119    664,613,997,892,457,936,451,903,530,140,172,288
120    1,329,227,995,784,915,872,903,807,060,280,344,576
121    2,658,455,991,569,831,745,807,614,120,560,689,152
122    5,316,911,983,139,663,491,615,228,241,121,378,304
123    10,633,823,966,279,326,983,230,456,482,242,756,608
124    21,267,647,932,558,653,966,460,912,964,485,513,216
125    42,535,295,865,117,307,932,921,825,928,971,026,432
126    85,070,591,730,234,615,865,843,651,857,942,052,864
127    170,141,183,460,469,231,731,687,303,715,884,105,728
128    340,282,366,920,938,463,463,374,607,431,768,211,456
129    680,564,733,841,876,926,926,749,214,863,536,422,912
130    1,361,129,467,683,753,853,853,498,429,727,072,845,824
131    2,722,258,935,367,507,707,706,996,859,454,145,691,648
132    5,444,517,870,735,015,415,413,993,718,908,291,383,296
133    10,889,035,741,470,030,830,827,987,437,816,582,766,592
134    21,778,071,482,940,061,661,655,974,875,633,165,533,184
135    43,556,142,965,880,123,323,311,949,751,266,331,066,368
136    87,112,285,931,760,246,646,623,899,502,532,662,132,736
137    174,224,571,863,520,493,293,247,799,005,065,324,265,472
138    348,449,143,727,040,986,586,495,598,010,130,648,530,944
139    696,898,287,454,081,973,172,991,196,020,261,297,061,888
140    1,393,796,574,908,163,946,345,982,392,040,522,594,123,776
141    2,787,593,149,816,327,892,691,964,784,081,045,188,247,552
142    5,575,186,299,632,655,785,383,929,568,162,090,376,495,104
143    11,150,372,599,265,311,570,767,859,136,324,180,752,990,208
144    22,300,745,198,530,623,141,535,718,272,648,361,505,980,416
145    44,601,490,397,061,246,283,071,436,545,296,723,011,960,832
146    89,202,980,794,122,492,566,142,873,090,593,446,023,921,664
147    178,405,961,588,244,985,132,285,746,181,186,892,047,843,328
148    356,811,923,176,489,970,264,571,492,362,373,784,095,686,656
149    713,623,846,352,979,940,529,142,984,724,747,568,191,373,312
150    1,427,247,692,705,959,881,058,285,969,449,495,136,382,746,624
151    2,854,495,385,411,919,762,116,571,938,898,990,272,765,493,248
152    5,708,990,770,823,839,524,233,143,877,797,980,545,530,986,496
153    11,417,981,541,647,679,048,466,287,755,595,961,091,061,972,992
154    22,835,963,083,295,358,096,932,575,511,191,922,182,123,945,984
155    45,671,926,166,590,716,193,865,151,022,383,844,364,247,891,968
156    91,343,852,333,181,432,387,730,302,044,767,688,728,495,783,936
157    182,687,704,666,362,864,775,460,604,089,535,377,456,991,567,872
158    365,375,409,332,725,729,550,921,208,179,070,754,913,983,135,744
159    730,750,818,665,451,459,101,842,416,358,141,509,827,966,271,488
160    1,461,501,637,330,902,918,203,684,832,716,283,019,655,932,542,976
161    2,923,003,274,661,805,836,407,369,665,432,566,039,311,865,085,952
162    5,846,006,549,323,611,672,814,739,330,865,132,078,623,730,171,904
163    11,692,013,098,647,223,345,629,478,661,730,264,157,247,460,343,808
164    23,384,026,197,294,446,691,258,957,323,460,528,314,494,920,687,616
165    46,768,052,394,588,893,382,517,914,646,921,056,628,989,841,375,232
166    93,536,104,789,177,786,765,035,829,293,842,113,257,979,682,750,464
167    187,072,209,578,355,573,530,071,658,587,684,226,515,959,365,500,928
168    374,144,419,156,711,147,060,143,317,175,368,453,031,918,731,001,856
169    748,288,838,313,422,294,120,286,634,350,736,906,063,837,462,003,712
170    1,496,577,676,626,844,588,240,573,268,701,473,812,127,674,924,007,424
171    2,993,155,353,253,689,176,481,146,537,402,947,624,255,349,848,014,848
172    5,986,310,706,507,378,352,962,293,074,805,895,248,510,699,696,029,696
173    11,972,621,413,014,756,705,924,586,149,611,790,497,021,399,392,059,392
174    23,945,242,826,029,513,411,849,172,299,223,580,994,042,798,784,118,784
175    47,890,485,652,059,026,823,698,344,598,447,161,988,085,597,568,237,568
176    95,780,971,304,118,053,647,396,689,196,894,323,976,171,195,136,475,136
177    191,561,942,608,236,107,294,793,378,393,788,647,952,342,390,272,950,272
178    383,123,885,216,472,214,589,586,756,787,577,295,904,684,780,545,900,544
179    766,247,770,432,944,429,179,173,513,575,154,591,809,369,561,091,801,088
180    1,532,495,540,865,888,858,358,347,027,150,309,183,618,739,122,183,602,176

Divide the number of bits by ten, multiply by three and then add one gets the answer.