Hugging Face Transformers

Hugging Face Transformers is the dominant library for working with large language models in Python. It provides a unified API for over 400,000 models on the Hugging Face Hub — BERT, GPT-2, LLaMA, Mistral, Falcon, BLOOM, Whisper, and more.

Installation

pip install transformers
pip install torch          # or tensorflow / jax
pip install accelerate     # for large model loading
pip install datasets       # for training data

Pipelines — Fastest Way to Get Results

The pipeline() function wraps a model and tokenizer into a one-line inference call:

from transformers import pipeline

# Sentiment analysis
sentiment = pipeline("sentiment-analysis")
result = sentiment("The new Claude model is impressively capable and fast.")
print(result)  # [{'label': 'POSITIVE', 'score': 0.9997}]

# Named entity recognition
ner = pipeline("ner", aggregation_strategy="simple")
entities = ner("Satya Nadella, CEO of Microsoft, spoke at their Redmond campus.")
for ent in entities:
    print(f"{ent['word']:<20} {ent['entity_group']:<8} {ent['score']:.3f}")

# Text generation
generator = pipeline("text-generation", model="gpt2")
output = generator("Natural language processing enables", max_length=50, num_return_sequences=2)
for seq in output:
    print(seq['generated_text'])

# Question answering
qa = pipeline("question-answering")
result = qa(
    question="What year was BERT released?",
    context="BERT was introduced by Google researchers in 2018. It uses bidirectional attention."
)
print(result)  # {'answer': '2018', 'score': 0.994}

# Zero-shot classification
classifier = pipeline("zero-shot-classification")
text = "The Federal Reserve raised interest rates by 25 basis points."
labels = ["finance", "sports", "technology", "politics"]
result = classifier(text, candidate_labels=labels)
print(dict(zip(result['labels'], [round(s, 3) for s in result['scores']])))

Loading Models and Tokenizers

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

texts = [
    "This transformer model achieves excellent results!",
    "The training took way too long and results were poor."
]

inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

probs = torch.softmax(outputs.logits, dim=-1)
labels = model.config.id2label

for i, text in enumerate(texts):
    pred_label = labels[probs[i].argmax().item()]
    confidence = probs[i].max().item()
    print(f"[{pred_label} {confidence:.3f}] {text}")

Fine-Tuning for Text Classification

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import numpy as np

# Sample dataset
data = {
    "text": [
        "Server CPU usage spiked to 98% during peak traffic.",
        "The sourdough bread turned out perfectly golden.",
        "CUDA out of memory error when training on large batches.",
        "Fresh herbs from the garden make pasta taste amazing.",
        "The API rate limit was exceeded after 60 requests.",
        "Homemade pizza dough needs at least 2 hours to rise."
    ],
    "label": [0, 1, 0, 1, 0, 1]  # 0=tech, 1=food
}
dataset = Dataset.from_dict(data)

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized = dataset.map(tokenize, batched=True)
tokenized = tokenized.train_test_split(test_size=0.2)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    report_to="none"
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = (preds == labels).mean()
    return {"accuracy": acc}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    compute_metrics=compute_metrics
)

trainer.train()

Token Classification (NER)

from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

ner_pipeline = pipeline(
    "ner",
    model="dslim/bert-base-NER",
    aggregation_strategy="simple",
    device=0 if torch.cuda.is_available() else -1
)

text = "In Q2 2025, Nvidia's revenue reached $44.1 billion, driven by data center demand."
results = ner_pipeline(text)
for entity in results:
    print(f"{entity['word']:<20} [{entity['entity_group']}] {entity['score']:.3f}")

Working with LLMs for Generation

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # requires HF access token
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"  # automatically distributes across GPUs
)

messages = [
    {"role": "user", "content": "Explain BERT's bidirectional attention in 2 sentences."}
]

input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(
        input_ids,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
print(response)

Useful Hub Models by Task (2025)

Task	Model	Notes
Sentiment	`distilbert-sst-2-english`	Fast, English only
NER	`dslim/bert-base-NER`	CoNLL-2003 trained
QA	`deepset/roberta-base-squad2`	Extractive QA
Summarization	`facebook/bart-large-cnn`	News summarization
Translation	`Helsinki-NLP/opus-mt-*`	1000+ language pairs
Text classification	Fine-tune DistilBERT	Fastest fine-tuning
Sentence similarity	`all-MiniLM-L6-v2`	Via sentence-transformers
Generation	`mistralai/Mistral-7B-Instruct`	Open-source, 7B params