Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix device name
  • Loading branch information
SunMarc committed Oct 9, 2025
commit 3b453005e9f4dd52fcd189006cff875e2789b0e4
6 changes: 3 additions & 3 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,10 +770,10 @@ def _load_state_dict_into_meta_model(
if value.device.type == "meta":
continue
val_kwargs = value.__dict__
if value.dtype in [torch.uint8, torch.int8]:
if not value.is_floating_point():
val_kwargs["requires_grad"] = False
param_to = "meta" if is_fsdp_enabled() and not is_local_dist_rank_0() else "cpu"
value = type(value)(value.data.to(param_to), **val_kwargs)
device = "meta" if is_fsdp_enabled() and not is_local_dist_rank_0() else "cpu"
value = type(value)(value.data.to(device), **val_kwargs)
setattr(module, param_type, value)

# Remove the param from the state dict if it was not loaded on the fly to avoid wasting memory
Expand Down