xma.functional.softmax.triton_implementation.forward

softmax_forward_triton_kernel(x_ptr, x_stride, y_ptr, y_stride, logits_multiplier, B, H, BLOCK_SIZE_B: triton.language.constexpr, BLOCK_SIZE_H: triton.language.constexpr)