Skip to content

Commit

Permalink
Added more functions in spec
Browse files Browse the repository at this point in the history
  • Loading branch information
Gustavo2622 committed Nov 27, 2023
1 parent b6ee601 commit dc91d37
Showing 1 changed file with 49 additions and 3 deletions.
52 changes: 49 additions & 3 deletions libs/lospecs/examples/spec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,19 @@ VPBROADCAST_16u16(w1@256) -> @256 =
VPMADDUBSW_256(w1@256, w2@256) -> @256 =

# REF: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#techs=MMX,SSE_ALL,AVX_ALL&ig_expand=324,324,101,6663,4774&text=vpmulh
# mu
VPMULH_16u16(w1@256, w2@256) -> @256 =
map<16, 16>(
fun x@16 y@16 . mult<16>(x, y),
fun x@16 y@16 . mult<16>(x, y)[,
w1,
w2
)

# Need to implement sign extend (intrinsic?)
# TODO: Check indexing here
# rshift<n>(x,y): right shift logical x by y (x >> y) as n-bit ints
# sext<n>(x): sign extend x to n bits
# Can be implemented differently from this
VPMULHRS_16u16(w1@256, w2@256) -> @256 =
map<16, 16>(
fun x@16 y@16 . add<32>(
Expand All @@ -59,9 +63,51 @@ VPMULHRS_16u16(w1@256, w2@256) -> @256 =
w1,
w2)

# REF: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#techs=MMX,SSE_ALL,AVX_ALL&ig_expand=324,324,101,6663,4774,4785,6371&text=vpsra
# TODO?: Might need to be expanded here
# to match the intrinsics guide pseudocode at this level
# shift_rigbt_ar(x,y): shift right arithmetic, x >> y
VPSRA_16u16(w1@256, w2@256) -> @256 =
map<16, 16>(
fun x@16 y@16 . shift_right_ar<16>(x, y)
w1,
w2
)


# SatToSW<n>: SaturateToSignedWord<bit_len>
VPMADDUBSW_256(w1@256, w2@256) -> @256 =
map<16, 16>(
fun x@16 y@16 . SatToSW<16>(
add<16>(
mult<8>(x[0:7], y[0:7]),
mult<8>(x[8:15], y[8:15])
)
),
w1,
w2
)



# REF: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#techs=MMX,SSE_ALL,AVX_ALL&ig_expand=324,324,101,6663,4785,4906&text=vpackus
# Might need different implementation
# probably needs some kind of fold/accumulation combinator
# TODO: FINISH THIS
VPACKUS_16u16(w1@256, w2@256) -> @256 =
map<128, 2>(
fun v1@128 v2@128 . or<128>(

w1,
w2,
)


## TODO:
## VPACKUS_16u16
## VPBROADCAST_16u16 *
## VPMADDUBSW_256
## VPMADDUBSW_256 *
## VPMULHRS_16u16 *
## VPSRA_16u16
## VPSRA_16u16 *
## Check for mult bit handling consistency
## Check for bit addressing order consistency (endianness)

0 comments on commit dc91d37

Please sign in to comment.