Documentation Index
Fetch the complete documentation index at: https://docs.monostate.ai/llms.txt
Use this file to discover all available pages before exploring further.
LLM Training API
Complete API reference for LLM training.LLMTrainingParams
The main configuration class for LLM training.from autotrain.trainers.clm.params import LLMTrainingParams
Basic Parameters
params = LLMTrainingParams(
# Core parameters (always specify these)
model="google/gemma-3-270m", # Default: "google/gemma-3-270m"
data_path="./data.jsonl", # Default: "data"
project_name="my-model", # Default: "project-name"
# Data splits
train_split="train", # Default: "train"
valid_split=None, # Default: None
max_samples=None, # Default: None (use all)
)
Trainer Selection
params = LLMTrainingParams(
model="google/gemma-3-270m",
data_path="./data.jsonl",
project_name="my-model",
trainer="sft", # Default: "default" (pretraining). Options: sft, dpo, orpo, ppo, grpo, reward
)
Training Hyperparameters
params = LLMTrainingParams(
model="google/gemma-3-270m",
data_path="./data.jsonl",
project_name="my-model",
# Core hyperparameters (showing defaults)
epochs=1, # Default: 1
batch_size=2, # Default: 2
lr=3e-5, # Default: 3e-5
warmup_ratio=0.1, # Default: 0.1
gradient_accumulation=4, # Default: 4
weight_decay=0.0, # Default: 0.0
max_grad_norm=1.0, # Default: 1.0
# Precision
mixed_precision=None, # Default: None (options: bf16, fp16, None)
# Optimization
optimizer="adamw_torch", # Default: adamw_torch
scheduler="linear", # Default: linear
seed=42, # Default: 42
)
PEFT/LoRA Configuration
params = LLMTrainingParams(
model="meta-llama/Llama-3.2-1B",
data_path="./data.jsonl",
project_name="my-model",
# Enable LoRA (default: False)
peft=True,
lora_r=16, # Default: 16
lora_alpha=32, # Default: 32
lora_dropout=0.05, # Default: 0.05
target_modules="all-linear", # Default: all-linear
# Quantization (optional)
quantization="int4", # Options: int4, int8, or None (default: None)
# Merge after training (default is True - LoRA merged automatically)
merge_adapter=True,
)
Data Processing
params = LLMTrainingParams(
model="google/gemma-3-270m",
data_path="./data.jsonl",
project_name="my-model",
# Text processing
text_column="text",
block_size=-1, # Default: -1 (model default)
model_max_length=2048, # Auto-detected from model config (see note below)
add_eos_token=True, # Default: True
padding="right", # Default: "right"
# Chat format
chat_template=None, # Auto-detect or specify
apply_chat_template=True, # Default: True
# Efficiency
packing=None, # Default: None (set True to enable)
use_flash_attention_2=False, # Default: False
attn_implementation=None, # Default: None
)
model_max_length Auto-Detection: This parameter is now auto-detected from the model’s config. For example, Gemma 2 (8192 tokens) and Gemma 3 (32K-128K tokens depending on variant) will automatically use their native context lengths. The default 2048 is only used as a fallback when auto-detection fails. Set explicitly to override.
DPO Parameters
params = LLMTrainingParams(
model="meta-llama/Llama-3.2-1B",
data_path="./preferences.jsonl",
project_name="my-model",
trainer="dpo",
# DPO-specific
dpo_beta=0.1, # Default: 0.1
max_prompt_length=128, # Default: 128
max_completion_length=None, # Default: None
# Reference model (optional)
model_ref=None, # Uses same as model if None
# Data columns (required for DPO)
prompt_text_column="prompt",
text_column="chosen",
rejected_text_column="rejected",
)
ORPO Parameters
params = LLMTrainingParams(
model="google/gemma-2-2b",
data_path="./preferences.jsonl",
project_name="my-model",
trainer="orpo",
# ORPO-specific
dpo_beta=0.1, # Default: 0.1
max_prompt_length=128, # Default: 128
max_completion_length=None, # Default: None
# Data columns (required for ORPO)
prompt_text_column="prompt",
text_column="chosen",
rejected_text_column="rejected",
)
GRPO Parameters
params = LLMTrainingParams(
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
project_name="grpo-agent",
trainer="grpo",
# GRPO-specific (required)
rl_env_module="my_envs.hotel_env", # Python module path for the environment
rl_env_class="HotelEnv", # Class name in the environment module
rl_num_generations=4, # Default: 4 — completions per prompt
# Shared RL parameters (used by both PPO and GRPO)
rl_kl_coef=0.1, # Default: 0.1 — KL divergence penalty (beta)
rl_clip_range=0.2, # Default: 0.2 — Clipping range (epsilon)
rl_env_config=None, # Default: None — JSON config for environment constructor
rl_max_new_tokens=256, # Default: 128 — Max tokens per completion
rl_top_k=50, # Default: 50
rl_top_p=1.0, # Default: 1.0
rl_temperature=1.0, # Default: 1.0
# vLLM acceleration (optional)
use_vllm=False, # Default: False — enable vLLM for faster generation
vllm_mode="colocate", # Default: "colocate" — or "server"
vllm_gpu_memory_utilization=0.3, # Default: 0.3 — GPU memory fraction for vLLM (colocate)
vllm_server_url=None, # Default: None — vLLM server URL (server mode)
vllm_tensor_parallel_size=1, # Default: 1 — GPUs for vLLM tensor parallelism
vllm_server_gpus=1, # Default: 1 — GPUs reserved for vLLM server
)
GRPO does not require
data_path — the dataset is built by the environment’s build_dataset() method. Install pip install aitraining[vllm] for vLLM support.See GRPO Training for the full environment interface (
build_dataset, score_episode, get_tools).Knowledge Distillation
params = LLMTrainingParams(
model="google/gemma-3-270m", # Student
teacher_model="google/gemma-2-2b", # Teacher
data_path="./prompts.jsonl",
project_name="distilled-model",
use_distillation=True,
distill_temperature=3.0, # Default: 3.0
distill_alpha=0.7, # Default: 0.7
distill_max_teacher_length=512, # Default: 512
)
Logging & Saving
params = LLMTrainingParams(
model="google/gemma-3-270m",
data_path="./data.jsonl",
project_name="my-model",
# Logging
log="wandb", # wandb, tensorboard, or None (default: wandb)
logging_steps=-1, # Default: -1 (auto)
wandb_visualizer=True, # Terminal visualizer
wandb_token=None, # W&B API token (optional)
# Checkpointing
save_strategy="steps", # steps or epoch (default: epoch)
save_steps=500,
save_total_limit=1, # Default: 1
eval_strategy="steps",
)
Hub Integration
params = LLMTrainingParams(
model="google/gemma-3-270m",
data_path="./data.jsonl",
project_name="my-model",
# Push to Hub
push_to_hub=True,
username="your-username",
token="hf_...",
)
Running Training
from autotrain.project import AutoTrainProject
# Create and run project
project = AutoTrainProject(
params=params,
backend="local",
process=True
)
job_id = project.create()
Complete Example
from autotrain.trainers.clm.params import LLMTrainingParams
from autotrain.project import AutoTrainProject
# Full configuration
params = LLMTrainingParams(
# Model
model="meta-llama/Llama-3.2-1B",
project_name="llama-production",
# Data
data_path="./conversations.jsonl",
train_split="train",
valid_split="validation",
text_column="text",
block_size=2048,
# Training
trainer="sft",
epochs=3,
batch_size=2,
gradient_accumulation=8,
lr=2e-5,
warmup_ratio=0.1,
mixed_precision="bf16",
# LoRA
peft=True,
lora_r=32,
lora_alpha=64,
lora_dropout=0.05,
# Optimization
use_flash_attention_2=True,
packing=True,
auto_find_batch_size=True,
unsloth=False, # Use Unsloth for faster training
# Distribution (for multi-GPU)
distributed_backend=None, # None for auto (DDP), or "deepspeed"
# Logging
log="wandb",
logging_steps=-1,
save_strategy="steps",
save_steps=500,
save_total_limit=1,
# Hub
push_to_hub=True,
username="my-username",
token="hf_...",
)
# Run training
project = AutoTrainProject(
params=params,
backend="local",
process=True
)
job_id = project.create()
Next Steps
Python SDK
Full SDK reference
DPO Training
DPO deep dive