跳到内容

vllm_gaudi.extension.scales

DEVICES_SCALE_FACTORS 模块属性

DEVICES_SCALE_FACTORS = {'GAUDI2': 4, 'GAUDI3': 1}

EXP_BIAS_SETS 模块属性

EXP_BIAS_SETS = {
    ("GAUDI2", float8_e4m3fn): [3, 7, 11, 15],
    ("GAUDI2", float8_e5m2): [15],
    ("GAUDI3", float8_e4m3fn): range(0, 63),
    ("GAUDI3", float8_e5m2): range(0, 63),
}

EXP_WIDTH 模块属性

EXP_WIDTH = {
    float32: 8,
    bfloat16: 8,
    float8_e4m3fn: 4,
    float8_e5m2: 5,
}

FP8_143_SCALES 模块属性

FP8_143_SCALES = {
    device: (
        get_fp8_hw_alligned_scales(float8_e4m3fn, device)
    )
    for device in (keys())
}

FP8_143_SCALES_TRAITS 模块属性

FP8_143_SCALES_TRAITS = {
    device: (
        min(FP8_143_SCALES[device]),
        max(FP8_143_SCALES[device]),
        DEVICES_SCALE_FACTORS[device],
    )
    for device in (keys())
}

MAX_RANGE 模块属性

MAX_RANGE = {
    float32: max,
    bfloat16: max,
    float8_e4m3fn: max,
    float8_e5m2: max,
}

is_hpu_gaudi2 模块属性

is_hpu_gaudi2 = _get_device_type() == synDeviceGaudi2

ConvertScaleToHwAligned

Source code in vllm_gaudi/extension/scales.py
class ConvertScaleToHwAligned:

    def __init__(self, device_type="GAUDI3"):
        self.device_type = "GAUDI2" if is_hpu_gaudi2 else "GAUDI3"

    def calc(self, scale):
        if self.device_type == "GAUDI2":
            scale = scale * get_hpu_gaudi2_scale_factor()
        scale_pow2 = ScaleToPow2().calc(scale)
        min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[self.device_type]
        scale_pow2_hw = torch.minimum(
            torch.maximum(
                2.0**(torch.ceil(torch.log2(scale_pow2) / scale_factor) * scale_factor),
                torch.tensor(min_scale, dtype=scale.dtype, device=scale.device),
            ),
            torch.tensor(max_scale, dtype=scale.dtype, device=scale.device),
        )
        return scale_pow2_hw

device_type 实例属性

device_type = 'GAUDI2' if is_hpu_gaudi2 else 'GAUDI3'

__init__

__init__(device_type='GAUDI3')
Source code in vllm_gaudi/extension/scales.py
def __init__(self, device_type="GAUDI3"):
    self.device_type = "GAUDI2" if is_hpu_gaudi2 else "GAUDI3"

calc

calc(scale)
Source code in vllm_gaudi/extension/scales.py
def calc(self, scale):
    if self.device_type == "GAUDI2":
        scale = scale * get_hpu_gaudi2_scale_factor()
    scale_pow2 = ScaleToPow2().calc(scale)
    min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[self.device_type]
    scale_pow2_hw = torch.minimum(
        torch.maximum(
            2.0**(torch.ceil(torch.log2(scale_pow2) / scale_factor) * scale_factor),
            torch.tensor(min_scale, dtype=scale.dtype, device=scale.device),
        ),
        torch.tensor(max_scale, dtype=scale.dtype, device=scale.device),
    )
    return scale_pow2_hw

ScaleToPow2

Source code in vllm_gaudi/extension/scales.py
class ScaleToPow2:

    def calc(self, scale):
        scale_pow2 = 2.0**torch.ceil(torch.log2(scale))
        return scale_pow2

calc

calc(scale)
Source code in vllm_gaudi/extension/scales.py
def calc(self, scale):
    scale_pow2 = 2.0**torch.ceil(torch.log2(scale))
    return scale_pow2

get_default_exp_bias

get_default_exp_bias(dtype)
Source code in vllm_gaudi/extension/scales.py
def get_default_exp_bias(dtype):
    exp_width = EXP_WIDTH[dtype]
    return 2**(exp_width - 1) - 1

get_fp8_hw_alligned_scales

get_fp8_hw_alligned_scales(dtype, device)
Source code in vllm_gaudi/extension/scales.py
def get_fp8_hw_alligned_scales(dtype, device):
    exp_bias_set = EXP_BIAS_SETS.get((device, dtype), None)
    return (None if exp_bias_set is None else
            [x / get_fullscale(dtype, device) for x in get_fullscales_by_expbias_set(dtype, device, exp_bias_set)])

get_fullscale

get_fullscale(dtype, device, exp_bias=None)
Source code in vllm_gaudi/extension/scales.py
def get_fullscale(dtype, device, exp_bias=None):
    default_exp_bias = get_default_exp_bias(dtype)
    fullscale = 1
    if device == "GAUDI2" and dtype == torch.float8_e4m3fn:
        try:
            fullscale = MAX_RANGE[torch.float8_e4m3fnuz]
        except AttributeError as e:
            pass
    else:
        fullscale = MAX_RANGE[dtype]
    exp_bias = default_exp_bias if exp_bias is None else exp_bias
    fullscale = fullscale * (2**(default_exp_bias - exp_bias))
    return float(fullscale)

get_fullscales_by_expbias_set

get_fullscales_by_expbias_set(dtype, device, expbias_set)
Source code in vllm_gaudi/extension/scales.py
def get_fullscales_by_expbias_set(dtype, device, expbias_set):
    return [get_fullscale(dtype, device, exp_bias=eb) for eb in expbias_set]

get_hpu_gaudi2_scale_factor

get_hpu_gaudi2_scale_factor()
Source code in vllm_gaudi/extension/scales.py
def get_hpu_gaudi2_scale_factor():
    return (torch.finfo(torch.float8_e4m3fn).max / torch.finfo(torch.float8_e4m3fnuz).max)