• Rise from your gwave!

Hex to Bin

Discussion in 'Saturn Dev' started by paul_met, Dec 25, 2018.

  1. paul_met

    paul_met New Member

    Greetings. Maybe someone has an algorithm for converting from HEX to BIN (for SH2 asm)?
    For example:
    Code:
    AD (hex)= 10101101 (bin)
    Binary value must be written in HEX format.
    ---------------------------------------------
    This is my version of the code. Perhaps someone will have a smaller code.
    Code:
    00  MOV #0xXX, R6 (number of convertible bytes)
    02  MOV #0x00, R5 (jump offset #1)
    04  MOV.B @R0 +, R1 (reading curent byte)
    06  SHAR R1
    08  MOVT R2 (get low bit)
    0A  SHAR R1
    0C  MOVT R3 (get high bit)
    0E  SHLL2 R3
    10  SHLL2 R3
    12  BRAF R5 + 0x16 (jump to next step)
    14  OR R3, R2
    16  MOV R2, R4 (get fourth byte)
    18  BRA 0x06 (repeat conversion)
    1A  MOV #0x06, R5 (jump offset #2)
    1C  SHLL8 R2
    1E  OR R2, R4 (get third byte)
    20  BRA 0x06 (repeat conversion)
    22  MOV #0x0E, R5 (jump offset #3)
    24  SHLL16 R2
    26  OR R2, R4 (get second byte)
    28  BRA 0x06 (repeat conversion)
    2A  MOV #0x16, R5 (jump offset #4)
    2C  SHLL8 R2
    2E  SHLL16 R2
    30  OR R2, R4 (get first byte)
    32  DT R6  (cycle identificator)
    34  MOV.L R4, @R7 (writing 4 bytes)
    36  BF/S + 0x02 (return at beginning)
    38  ADD #0x04, R7 (writing step)
    
     
    Last edited: Dec 27, 2018
  2. antime

    antime Extra Hard Mid Boss

    Use a look-up table? 4- or 8-bit, depending on what you want to optimize for.
     
  3. paul_met

    paul_met New Member

    To convert 1 bpp tiles to 4 bpp.
     
  4. antime

    antime Extra Hard Mid Boss

    I'm still not sure I understand what you're asking, but this converts 1-4 ASCII hex digits to a number (uppercase assumed). Assembly output compiled with -Os.

    Code:
    uint32_t cvt(const char *ascii, unsigned int len)
    {
        static const int8_t table[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
            0, 0, 0, 0, 0, 0, 0,
            10, 11, 12, 13, 14, 15 };
        uint32_t result = 0;
        for (unsigned int i = 0; i < len; i++) {
            int8_t nybble = table[ascii[i] - '0'];
            result <<= 4;
            result |= nybble;
        }
        return result;
    }
    
    Code:
    00000000 <_cvt>:
      0:   d2 07     mov.l   20 <_cvt+0x20>,r2   ! 0 <_cvt>
      2:   e0 00     mov   #0,r0
      4:   75 01     add   #1,r5
      6:   45 10     dt   r5
      8:   8b 01     bf   e <_cvt+0xe>
      a:   00 0b     rts  
      c:   00 09     nop  
      e:   61 44     mov.b   @r4+,r1
      10:   40 08     shll2   r0
      12:   31 2c     add   r2,r1
      14:   71 d0     add   #-48,r1
      16:   61 10     mov.b   @r1,r1
      18:   40 08     shll2   r0
      1a:   af f4     bra   6 <_cvt+0x6>
      1c:   20 1b     or   r1,r0
      1e:   00 09     nop  
      20:   00 00     .word 0x0000
    
     
  5. paul_met

    paul_met New Member

    For example, there is a tile font in the format of 1 bit per pixel (monochrome). I need to convert it to 4 bits per pixel (16 colors).
     
  6. antime

    antime Extra Hard Mid Boss

    So not at all what you originally asked about... Assuming the foregroung and background colors are constant, a pure C implementation is slightly longer. It might be possible to squeeze a couple of instructions out of the assembly by manual register allocation, but this is trivially unit-testable on the host.
    Code:
    #define FG 0x0F
    #define BG 0x00
    
    void cvt(const char *input, int len, char *output)
    {
        const char table[4] = {
            (BG << 4) | BG,
            (BG << 4) | FG,
            (FG << 4) | BG,
            (FG << 4) | FG
        };
    
        for (int i = 0; i < len; i++) {
            unsigned int bits = *input++;
            for (int j = 3; j >= 0; j--) {
                output[j] = table[bits & 3];
                bits >>= 2;
            }
            output += 4;
        }
    }
    
    Code:
    00000000 <_cvt>:
      0:   d1 0f     mov.l   40 <_cvt+0x40>,r1   ! ff0ff
      2:   7f fc     add   #-4,r15
      4:   45 11     cmp/pz   r5
      6:   2f 12     mov.l   r1,@r15
      8:   61 53     mov   r5,r1
      a:   8d 01     bt.s   10 <_cvt+0x10>
      c:   71 01     add   #1,r1
      e:   e1 01     mov   #1,r1
      10:   41 10     dt   r1
      12:   8f 02     bf.s   1a <_cvt+0x1a>
      14:   63 63     mov   r6,r3
      16:   00 0b     rts   
      18:   7f 04     add   #4,r15
      1a:   74 01     add   #1,r4
      1c:   60 43     mov   r4,r0
      1e:   70 f0     add   #-16,r0
      20:   84 0f     mov.b   @(15,r0),r0
      22:   73 04     add   #4,r3
      24:   65 03     mov   r0,r5
      26:   66 33     mov   r3,r6
      28:   e2 04     mov   #4,r2
      2a:   e7 03     mov   #3,r7
      2c:   27 59     and   r5,r7
      2e:   60 73     mov   r7,r0
      30:   07 fc     mov.b   @(r0,r15),r7
      32:   42 10     dt   r2
      34:   23 74     mov.b   r7,@-r3
      36:   8f f8     bf.s   2a <_cvt+0x2a>
      38:   45 09     shlr2   r5
      3a:   af ea     bra   12 <_cvt+0x12>
      3c:   41 10     dt   r1
      3e:   00 09     nop   
      40:   00 0f     mac.l   @r0+,@r0+
      42:   f0 ff     .word 0xf0ff
    
     
  7. paul_met

    paul_met New Member

    Some strange syntax I do not understand. What does this mean?
    Code:
    cvt
    .word 0xf0ff
    
     
  8. antime

    antime Extra Hard Mid Boss

    0x40-0x44 is the lookup table.
     
  9. paul_met

    paul_met New Member

    Significant optimization, if you convert 1 bpp reverse-order to 4 bpp (-12 instructions and -2 registers).
    Code:
    00    MOV #0xXX, R4    (number of convertible bytes)
    02    MOV #0x00, R3    (constant)
    04    MOV.B @R0 +, R1 (reading curent byte)
    06    SHAR R1 
    08    MOVT R2              (get high bit)
    0A    SHLL2 R2 
    0C    SHLL2 R2 
    0E    SHAR R1 
    10    ADDC R2, R3        (get bytes)
    12    CMP/PL R1            (checking end of bit conversion)
    14    BF + 0x1A            (jump to writing 4 bytes)
    16    BT/S + 0x06        (continue bit conversion)
    18    SHLL8 R3 
    1A    MOV.L R3, @R5    (writing 4 bytes)
    1C    DT R4                 (decrement of cycles)
    1E    BF/S + 0x02        (return at beginning)
    20    ADD #0x04, R5    (writing step)
     
  10. antime

    antime Extra Hard Mid Boss

    Your "end of bit conversion" condition doesn't work. Depending on the MSB of the byte loaded from @r0, the cmp/pl is either always false, or always true (loads are sign-extending, and shar is sign-preserving, and the comparison is signed). Also, if any bit pattern is valid you can't early-out.

    Ignoring that, you could remove one register by using rotates instead of adds:
    Code:
    00000000 <cnv>:
      0:   e4 63     mov   #99,r4
      2:   e2 00     mov   #0,r2
      4:   61 04     mov.b   @r0+,r1
      6:   41 21     shar   r1
      8:   42 24     rotcl   r2
      a:   42 00     shll   r2
      c:   42 08     shll2   r2
      e:   41 21     shar   r1
      10:   42 24     rotcl   r2
      12:   41 15     cmp/pl   r1
      14:   8b 01     bf   1a <cnv+0x1a>
      16:   8d f6     bt.s   6 <cnv+0x6>
      18:   42 18     shll8   r2
      1a:   25 22     mov.l   r2,@r5
      1c:   44 10     dt   r4
      1e:   8f f0     bf.s   2 <cnv+0x2>
      20:   75 04     add   #4,r5
    
    Getting the data in the right order would just require shifting the byte left by 24, and then shifting out the MSB.
     
    Last edited: Jan 2, 2019
  11. paul_met

    paul_met New Member

    Yes, I forgot to add EXTU.B R1, R1. The code should look like this:
    Code:
    00    MOV #0xXX, R4    (number of convertible bytes)
    02    MOV #0x00, R3    (constant)
    04    MOV.B @R0 +, R1    (reading curent byte)
    06    EXTU.B R1, R1  
    08    SHAR R1  
    0A    MOVT R2    (get high bit)
    0C    SHLL2 R2  
    0E    SHLL2 R2  
    10    SHAR R1  
    12    ADDC R2, R3    (get bytes)
    14    CMP/PL R1    (checking end of bit conversion)
    16    BF + 0x1C    (jump to writing 4 bytes)
    18    BT/S + 0x08    (continue bit conversion)
    1A    SHLL8 R3  
    1C    MOV.L R3, @R5    (writing 4 bytes)
    1E    DT R4    decrement of cycles
    20    BF/S + 0x02    (return at beginning)
    22    ADD #0x04, R5    (writing step)
    
    But your code gives the wrong result after second cycle.
     
  12. antime

    antime Extra Hard Mid Boss

    Because I was bored, here's another solution. With the table it's longer, but should run quicker. Obviously when optimizing for speed you can't beat a full 1K table, but I think this is a reasonable compromise.
    Code:
    ! Parameters:
    ! r4 = source
    ! r5 = length (>= 1)
    ! r6 = dest
    
    ! Local registers:
    ! r0 = table pointer
    ! r1 = constant 0xf
    ! r2 = low nybble
    ! r3 = high nybble
    
    cnv:
      mov  #0xf, r1
      mova  table, r0
    loop:
      mov.b  @r4+, r2
      mov  r2, r3
      shlr2  r3
      shlr2  r3
      and  r1, r2
      and  r1, r3
      mov.w  @(r0, r2), r2
      mov.w  @(r0, r3), r3
      shll16  r2
      xtrct  r3, r2
      mov.l  r2, @r6
      dt  r5
      bf/s  loop
      add  #4, r6
      rts
      nop
    
      .align 2
    table:
      .word 0x0000
      .word 0x0001
      .word 0x0010
      .word 0x0011
      .word 0x0100
      .word 0x0101
      .word 0x0110
      .word 0x0111
      .word 0x1000
      .word 0x1001
      .word 0x1010
      .word 0x1011
      .word 0x1100
      .word 0x1101
      .word 0x1110
      .word 0x1111
    
     

Share This Page