Skip to content

Commit 1eccde6

Browse files
committed
convert-hf : support bfloat16 conversion
1 parent bffdaf4 commit 1eccde6

File tree

2 files changed

+39
-15
lines changed

2 files changed

+39
-15
lines changed

convert-hf-to-gguf.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,7 @@ def write_tensors(self):
239239
data: np.ndarray = data # type hint
240240
n_dims = len(data.shape)
241241
data_dtype = data.dtype
242-
243-
# if f32 desired, convert any float16 to float32
244-
if self.ftype == 0 and data_dtype == np.float16:
245-
data = data.astype(np.float32)
242+
data_qtype: gguf.GGMLQuantizationType | None = None
246243

247244
# when both are True, f32 should win
248245
extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
@@ -254,20 +251,33 @@ def write_tensors(self):
254251
# if f16 desired, convert any float32 2-dim weight tensors to float16
255252
extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)
256253

257-
# when both extra_f32 and extra_f16 are False, convert to float32 by default
258-
if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
259-
data = data.astype(np.float32)
254+
if self.ftype != gguf.GGMLQuantizationType.F32 and extra_f16 and not extra_f32:
255+
if self.ftype == gguf.GGMLQuantizationType.F16:
256+
if data_dtype != np.float16:
257+
data = data.astype(np.float16)
258+
data_qtype = gguf.GGMLQuantizationType.F16
259+
260+
elif self.ftype == gguf.GGMLQuantizationType.BF16:
261+
if data_dtype != np.float32:
262+
data = data.astype(np.float32)
263+
data.dtype = np.int32
264+
data = (data >> 16).astype(np.int16)
265+
data_qtype = gguf.GGMLQuantizationType.BF16
266+
267+
else: # by default, convert to float32
268+
if data_dtype != np.float32:
269+
data = data.astype(np.float32)
270+
data_qtype = gguf.GGMLQuantizationType.F32
260271

261-
if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
262-
data = data.astype(np.float16)
272+
assert data_qtype is not None
263273

264274
# reverse shape to make it similar to the internal ggml dimension order
265275
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
266276

267277
# n_dims is implicit in the shape
268-
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}")
278+
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
269279

270-
self.gguf_writer.add_tensor(new_name, data)
280+
self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
271281

272282
def write(self):
273283
self.write_tensors()
@@ -2417,8 +2427,8 @@ def parse_args() -> argparse.Namespace:
24172427
help="path to write to; default: based on input",
24182428
)
24192429
parser.add_argument(
2420-
"--outtype", type=str, choices=["f32", "f16"], default="f16",
2421-
help="output format - use f32 for float32, f16 for float16",
2430+
"--outtype", type=str, choices=["f32", "f16", "bf16"], default="f16",
2431+
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16",
24222432
)
24232433
parser.add_argument(
24242434
"--bigendian", action="store_true",
@@ -2475,6 +2485,7 @@ def main() -> None:
24752485
ftype_map = {
24762486
"f32": gguf.GGMLQuantizationType.F32,
24772487
"f16": gguf.GGMLQuantizationType.F16,
2488+
"bf16": gguf.GGMLQuantizationType.BF16,
24782489
}
24792490

24802491
if args.outfile is not None:

gguf-py/gguf/gguf_writer.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
logger = logging.getLogger(__name__)
2929

3030

31+
# TODO: generalize LazyTorchTensor to make the Numpy equivalent cleaner
3132
class LazyTensor:
3233
data: Callable[[], np.ndarray[Any, Any]]
3334
# to avoid too deep recursion
@@ -38,12 +39,24 @@ class LazyTensor:
3839
def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]):
3940
self.data = data
4041
self.functions = []
41-
self.dtype = np.dtype(dtype)
42+
self.__dict__["dtype"] = np.dtype(dtype)
4243
self.shape = shape
4344

45+
def __setattr__(self, __name: str, __value: Any) -> None:
46+
if __name == "dtype":
47+
def modify_attr(o: object):
48+
setattr(o, __name, __value)
49+
return o
50+
self.functions.append(modify_attr)
51+
self.__dict__[__name] = __value
52+
53+
def __rshift__(self, __n):
54+
self.functions.append(lambda n: n >> __n)
55+
return self
56+
4457
def astype(self, dtype: type, **kwargs) -> LazyTensor:
4558
self.functions.append(lambda n: n.astype(dtype, **kwargs))
46-
self.dtype = np.dtype(dtype)
59+
self.__dict__["dtype"] = np.dtype(dtype)
4760
return self
4861

4962
@property

0 commit comments

Comments
 (0)