Hugging Face Transformers
Hugging Face Transformers is the dominant library for working with large language models in Python. It provides a unified API for over 400,000 models on the Hugging Face Hub — BERT, GPT-2, LLaMA, Mistral, Falcon, BLOOM, Whisper, and more.
Installation
pip install transformerspip install torch # or tensorflow / jaxpip install accelerate # for large model loadingpip install datasets # for training dataPipelines — Fastest Way to Get Results
The pipeline() function wraps a model and tokenizer into a one-line inference call:
from transformers import pipeline
# Sentiment analysissentiment = pipeline("sentiment-analysis")result = sentiment("The new Claude model is impressively capable and fast.")print(result) # [{'label': 'POSITIVE', 'score': 0.9997}]
# Named entity recognitionner = pipeline("ner", aggregation_strategy="simple")entities = ner("Satya Nadella, CEO of Microsoft, spoke at their Redmond campus.")for ent in entities: print(f"{ent['word']:<20} {ent['entity_group']:<8} {ent['score']:.3f}")
# Text generationgenerator = pipeline("text-generation", model="gpt2")output = generator("Natural language processing enables", max_length=50, num_return_sequences=2)for seq in output: print(seq['generated_text'])
# Question answeringqa = pipeline("question-answering")result = qa( question="What year was BERT released?", context="BERT was introduced by Google researchers in 2018. It uses bidirectional attention.")print(result) # {'answer': '2018', 'score': 0.994}
# Zero-shot classificationclassifier = pipeline("zero-shot-classification")text = "The Federal Reserve raised interest rates by 25 basis points."labels = ["finance", "sports", "technology", "politics"]result = classifier(text, candidate_labels=labels)print(dict(zip(result['labels'], [round(s, 3) for s in result['scores']])))Loading Models and Tokenizers
from transformers import AutoTokenizer, AutoModelForSequenceClassificationimport torch
model_name = "distilbert-base-uncased-finetuned-sst-2-english"tokenizer = AutoTokenizer.from_pretrained(model_name)model = AutoModelForSequenceClassification.from_pretrained(model_name)
texts = [ "This transformer model achieves excellent results!", "The training took way too long and results were poor."]
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad(): outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=-1)labels = model.config.id2label
for i, text in enumerate(texts): pred_label = labels[probs[i].argmax().item()] confidence = probs[i].max().item() print(f"[{pred_label} {confidence:.3f}] {text}")Fine-Tuning for Text Classification
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArgumentsfrom datasets import Datasetimport numpy as np
# Sample datasetdata = { "text": [ "Server CPU usage spiked to 98% during peak traffic.", "The sourdough bread turned out perfectly golden.", "CUDA out of memory error when training on large batches.", "Fresh herbs from the garden make pasta taste amazing.", "The API rate limit was exceeded after 60 requests.", "Homemade pizza dough needs at least 2 hours to rise." ], "label": [0, 1, 0, 1, 0, 1] # 0=tech, 1=food}dataset = Dataset.from_dict(data)
model_name = "distilbert-base-uncased"tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize(examples): return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
tokenized = dataset.map(tokenize, batched=True)tokenized = tokenized.train_test_split(test_size=0.2)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
training_args = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=4, per_device_eval_batch_size=4, evaluation_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, report_to="none")
def compute_metrics(eval_pred): logits, labels = eval_pred preds = np.argmax(logits, axis=-1) acc = (preds == labels).mean() return {"accuracy": acc}
trainer = Trainer( model=model, args=training_args, train_dataset=tokenized["train"], eval_dataset=tokenized["test"], compute_metrics=compute_metrics)
trainer.train()Token Classification (NER)
from transformers import AutoTokenizer, AutoModelForTokenClassificationfrom transformers import pipeline
ner_pipeline = pipeline( "ner", model="dslim/bert-base-NER", aggregation_strategy="simple", device=0 if torch.cuda.is_available() else -1)
text = "In Q2 2025, Nvidia's revenue reached $44.1 billion, driven by data center demand."results = ner_pipeline(text)for entity in results: print(f"{entity['word']:<20} [{entity['entity_group']}] {entity['score']:.3f}")Working with LLMs for Generation
from transformers import AutoTokenizer, AutoModelForCausalLMimport torch
model_name = "mistralai/Mistral-7B-Instruct-v0.2" # requires HF access tokentokenizer = AutoTokenizer.from_pretrained(model_name)model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" # automatically distributes across GPUs)
messages = [ {"role": "user", "content": "Explain BERT's bidirectional attention in 2 sentences."}]
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
with torch.no_grad(): outputs = model.generate( input_ids, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9 )
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)print(response)Useful Hub Models by Task (2025)
| Task | Model | Notes |
|---|---|---|
| Sentiment | distilbert-sst-2-english | Fast, English only |
| NER | dslim/bert-base-NER | CoNLL-2003 trained |
| QA | deepset/roberta-base-squad2 | Extractive QA |
| Summarization | facebook/bart-large-cnn | News summarization |
| Translation | Helsinki-NLP/opus-mt-* | 1000+ language pairs |
| Text classification | Fine-tune DistilBERT | Fastest fine-tuning |
| Sentence similarity | all-MiniLM-L6-v2 | Via sentence-transformers |
| Generation | mistralai/Mistral-7B-Instruct | Open-source, 7B params |