Format Email Messages Using NLP

NLP can automate email formatting, tone analysis, and content improvement — useful for building writing assistants, customer service tools, and email productivity applications.

Analyzing Email Tone with VADER

Before reformatting, understand the current tone:

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
import re

sia = SentimentIntensityAnalyzer()

def analyze_email_tone(email_text):
    sentences = re.split(r'(?<=[.!?])\s+', email_text)
    sentence_scores = []

    for sentence in sentences:
        if len(sentence.strip()) > 5:
            score = sia.polarity_scores(sentence)
            sentence_scores.append({
                "text": sentence.strip(),
                "sentiment": "positive" if score['compound'] > 0.05
                             else "negative" if score['compound'] < -0.05
                             else "neutral",
                "score": round(score['compound'], 3)
            })

    overall = sia.polarity_scores(email_text)['compound']
    return {
        "overall_tone": "positive" if overall > 0.05 else "negative" if overall < -0.05 else "neutral",
        "overall_score": round(overall, 3),
        "sentences": sentence_scores
    }

email = """
Hi team,

I'm extremely frustrated with the ongoing delays in the project. This is the third
deadline we've missed and I'm very disappointed. However, I do appreciate the hard
work everyone has put in. Let's find a solution together.

Best regards,
Sarah
"""

result = analyze_email_tone(email)
print(f"Overall tone: {result['overall_tone']} (score: {result['overall_score']})")
print("\nSentence-level analysis:")
for s in result['sentences']:
    print(f"  [{s['sentiment']:<9} {s['score']:+.3f}] {s['text'][:60]}")

Extracting Email Components

import re

def parse_email(raw_email):
    components = {
        "greeting": "",
        "body_paragraphs": [],
        "closing": "",
        "signature": ""
    }

    lines = raw_email.strip().split('\n')
    lines = [l.strip() for l in lines if l.strip()]

    # Detect greeting (first line starting with Hi/Hello/Dear)
    greeting_pattern = re.compile(r'^(hi|hello|dear|hey|good\s+(morning|afternoon|evening))', re.IGNORECASE)
    if lines and greeting_pattern.match(lines[0]):
        components["greeting"] = lines[0]
        lines = lines[1:]

    # Detect closing (Best/Regards/Thanks/Sincerely)
    closing_pattern = re.compile(r'^(best|regards|thanks|thank you|sincerely|cheers|kind regards)', re.IGNORECASE)
    for i, line in enumerate(reversed(lines)):
        if closing_pattern.match(line):
            idx = len(lines) - 1 - i
            components["closing"] = lines[idx]
            components["signature"] = ' '.join(lines[idx + 1:])
            lines = lines[:idx]
            break

    # Remaining lines are body
    components["body_paragraphs"] = lines

    return components

raw = """Dear John,

I wanted to follow up on our meeting last Tuesday.
The project timeline looks good and we're on track.
Please let me know if you have any concerns.

Best regards,
Emily Chen
"""

parsed = parse_email(raw)
for key, value in parsed.items():
    print(f"{key}: {value}")

Grammar and Clarity Improvement with LanguageTool

# pip install language-tool-python
import language_tool_python

tool = language_tool_python.LanguageTool('en-US')

def check_email_grammar(text):
    matches = tool.check(text)
    suggestions = []

    for match in matches:
        suggestions.append({
            "error": text[match.offset:match.offset + match.errorLength],
            "message": match.message,
            "suggestion": match.replacements[0] if match.replacements else "No suggestion",
            "category": match.ruleId
        })

    corrected = language_tool_python.utils.correct(text, matches)
    return {"original": text, "corrected": corrected, "issues": suggestions}

email_text = """
Hi john,

I want to inform you that the the project deadline have been moved to next friday.
Please make sure everyone on you're team are aware of this change.

Best,
Michael
"""

result = check_email_grammar(email_text)
print("Original:")
print(result["original"])
print("\nCorrected:")
print(result["corrected"])
print("\nIssues found:", len(result["issues"]))
for issue in result["issues"]:
    print(f"  '{issue['error']}' → {issue['suggestion']} ({issue['message'][:50]})")

Generating a Subject Line

from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def generate_subject_line(email_body, max_length=15):
    # Summarize to a very short form as subject line
    summary = summarizer(
        email_body,
        max_length=max_length,
        min_length=5,
        do_sample=False
    )
    subject = summary[0]['summary_text']

    # Clean up
    subject = subject.rstrip('.').strip()
    return subject

email_body = """
I wanted to let you know that we've completed the technical review of the new
NLP pipeline and everything looks great. The accuracy metrics have improved by
15% compared to our baseline and the processing speed is three times faster.
We're ready to schedule a demo for the stakeholders next week.
"""

subject = generate_subject_line(email_body)
print(f"Suggested subject: {subject}")

LLM-Powered Email Rewriting

from openai import OpenAI

client = OpenAI()

def reformat_email(original_email, style="professional"):
    style_instructions = {
        "professional": "Make it formal, concise, and professional. Use clear structure.",
        "friendly": "Make it warm, approachable, and conversational while staying professional.",
        "concise": "Remove all unnecessary words. Get to the point immediately. Use bullet points if helpful.",
        "diplomatic": "Soften any harsh language. Make critical feedback constructive and respectful."
    }

    instruction = style_instructions.get(style, style_instructions["professional"])

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": f"You are an email writing assistant. Reformat the email as follows: {instruction}. Preserve the original intent and key information. Return only the reformatted email."
            },
            {
                "role": "user",
                "content": f"Reformat this email:\n\n{original_email}"
            }
        ],
        max_tokens=400,
        temperature=0.5
    )

    return response.choices[0].message.content

harsh_email = """
John,

This is completely unacceptable. The report was supposed to be done yesterday
and I have NO IDEA what you've been doing all week. This is the third time
this has happened. I need this done TODAY or there will be consequences.

Mark
"""

print("=== Original ===")
print(harsh_email)
print("\n=== Reformatted (diplomatic) ===")
print(reformat_email(harsh_email, style="diplomatic"))

Email Summarization

from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_long_email(email_text, max_length=100, min_length=30):
    # Remove greeting and signature for cleaner summarization
    lines = email_text.strip().split('\n')
    body_lines = [l for l in lines[2:-3] if l.strip()]  # skip header/footer
    body = ' '.join(body_lines)

    if len(body.split()) < 30:
        return body  # Too short to summarize

    summary = summarizer(body, max_length=max_length, min_length=min_length, do_sample=False)
    return summary[0]['summary_text']

long_email = """
Hi team,

I hope this message finds you well. I wanted to provide an update on the Q3 NLP platform
project. Over the past two weeks, our engineering team has successfully integrated the
new BERT-based classification model into the production pipeline. The model processes
customer support tickets and automatically categorizes them by urgency and department.

Initial results are very promising. The model achieves 94% accuracy on our test set,
compared to 78% with our previous rule-based system. Processing time is also significantly
faster, handling 500 tickets per minute versus 50 with the old system.

The next phase involves deploying the sentiment analysis component and building the
monitoring dashboard. We expect to complete this by end of Q3 and are on track
with our original timeline.

Please let me know if you have any questions or need more details on the technical implementation.

Best regards,
Jennifer Park
"""

summary = summarize_long_email(long_email)
print("Summary:", summary)