Fine-tuning LLMs on AMD GPUs with Unsloth Guide
Learn how to fine-tune large language models (LLMs) on AMD GPUs with Unsloth.
2
3
Install PyTorch
uv pip install "torch>=2.4,<2.11.0" "torchvision<0.26.0" "torchaudio<2.11.0" \
--index-url https://download.pytorch.org/whl/rocm7.1 --upgrade --force-reinstall
ROCM_TAG="$({ command -v amd-smi >/dev/null 2>&1 && amd-smi version 2>/dev/null | awk -F'ROCm version: ' 'NF>1{split($2,a,"."); print "rocm"a[1]"."a[2]; ok=1; exit} END{exit !ok}'; } || { [ -r /opt/rocm/.info/version ] && awk -F. '{print "rocm"$1"."$2; exit}' /opt/rocm/.info/version; } || { command -v hipconfig >/dev/null 2>&1 && hipconfig --version 2>/dev/null | awk -F': *' '/HIP version/{split($2,a,"."); print "rocm"a[1]"."a[2]; ok=1; exit} END{exit !ok}'; } || { command -v dpkg-query >/dev/null 2>&1 && ver="$(dpkg-query -W -f="${Version}\n" rocm-core 2>/dev/null)" && [ -n "$ver" ] && awk -F'[.-]' '{print "rocm"$1"."$2; exit}' <<<"$ver"; } || { command -v rpm >/dev/null 2>&1 && ver="$(rpm -q --qf '%{VERSION}\n' rocm-core 2>/dev/null)" && [ -n "$ver" ] && awk -F'[.-]' '{print "rocm"$1"."$2; exit}' <<<"$ver"; })"; [ -n "$ROCM_TAG" ] && uv pip install "torch>=2.4,<2.11.0" "torchvision<0.26.0" "torchaudio<2.11.0" --index-url "https://download.pytorch.org/whl/$ROCM_TAG" --upgrade --force-reinstall
4
Install Unsloth
uv pip install unsloth[amd]
# x86_64 systems:
pip install --force-reinstall --no-cache-dir --no-deps \
"https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl"
# aarch64 systems: replace x86_64 with aarch64 in the URL above
# Fallback if the URL is unreachable:
# pip install --force-reinstall --no-cache-dir --no-deps "bitsandbytes>=0.49.1"
5
Start fine-tuning with Unsloth!
export HSA_OVERRIDE_GFX_VERSION=9.4.2 # Required for AMD MI300X
export HF_HUB_DISABLE_XET=1 # Fixes HuggingFace download issues on AMDfrom unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
model_name = "unsloth/gemma-4-26b-a4b-it",
max_seq_length = 2048,
load_in_4bit = True,
)
model = FastModel.get_peft_model(
model,
r = 16,
lora_alpha = 16,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
)from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
formatting_func = formatting_func,
args = SFTConfig(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
max_steps = 60,
output_dir = "outputs",
report_to = "none",
),
)
trainer_stats = trainer.train()
🔢 Reinforcement Learning on AMD GPUs





📚AMD Free One-click notebooks


Last updated
Was this helpful?




