Demo entry 6647450

Matrix dot product

   

Submitted by Kangcheng Xu on Oct 20, 2017 at 05:25
Language: GAS. Code size: 2.5 kB.

# void matrix_prod(void *A, void *B, void *C, int n);
#   - %rdi: pointer to matrix A
#   - %rsi: pointer to matrix B
#   - %rdx: pointer to matrix C (result matrix)
#   - %ecx: sqaure matrix dimension

# long dot_prod(void *A, void *B, int n, int i, int j);
#   - %rdi: base address of matrix A
#   - %rsi: base address of matrix B
#   - %edx: n, dimension of the matrix
#   - %ecx: i-th row
#   - %r8d: j-th col
#   - %rax: 64-bit return value

# unsigned char mod(long x, unsigned char m);
#   - %rdi: original
#   - %sil: mod
#   - %al: 8-bit return value (remainder)

# Algorithm
# for i from 0 to n-1 do
#   for j from 0 to n-1 do
#     C[i][j] = mod(dot_prod(A, B, n, i, j), 17)

  .globl matrix_prod

matrix_prod:
  movl $0, %r8d                 # r8d = 0, the outer loop counter (row)
  movl $0, %r9d                 # r9d = 0, the inner loop counter (col)

.outer_loop_condition:          # while (r8d <= ecx)
  cmpl %r8d, %ecx               # ecx - r8d ? 0
  jle   .return                 # if r8d > ecx, return

.outer_loop_body:

.inner_loop_condition:
  cmpl %r9d, %ecx               # ecx - r8d ? 0
  jle   .outer_loop_increment   # if r9d > ecx, continue

.inner_loop_body:
  # saving caller-saved regs
  pushq %rdi
  pushq %rsi
  pushq %rdx
  pushq %rcx
  pushq %r8
  pushq %r9

  # building args, first args remains in original regs
  movl %ecx, %edx               # 3rd arg
  movl %r8d, %ecx               # 4th arg
  movl %r9d, %r8d               # 5th arg
  call dot_prod                 # dot_prod(A, B, n, i, j)

  movq %rax, %rdi               # build 1st arg, the returned value
  movb $17, %sil                # build 2nd arg
  call mod                      # mod(i, m)

  # restore regs from stack
  popq %r9
  popq %r8
  popq %rcx
  popq %rdx
  popq %rsi
  popq %rdi

  movl %r8d, %r10d              # r10d = r8d, so we don't overide r8d
  imul %ecx, %r10d              # r10d *= ecx, get row offset
  addq %rdx, %r10               # r10 += rdx, get row offset address
  addq %r9, %r10                # r10 += r9, col offset
  movb %al, (%r10)              # write to memeory

.inner_loop_increment:
  incl %r9d                     # r9d++
  jmp .inner_loop_condition     # always jmp

.outer_loop_increment:
  incl %r8d                     # r8d++
  xorl %r9d, %r9d               # r9d = 0, reset col counter to 0
  jmp .outer_loop_condition     # always jmp

.return:
  ret

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).