mypy is a static type checker for Python that helps catch type errors before code execution. By adding type hints to Python code, mypy can verify type consistency and catch bugs that would otherwise only appear at runtime.
Key Features
- Static analysis: Catch type errors without running code
- Gradual typing: Add types incrementally to existing code
- IDE integration: Real-time error checking in editors
- Third-party library support: Type stubs for popular libraries
- Flexible strictness: Configure type checking rigor
- No runtime overhead: Type hints are ignored during execution
Type Hints Basics
import numpy as np
from typing import List, Tuple, Optional, Dict
from pathlib import Path
# Basic type annotations
def zscore(data: np.ndarray, axis: int = 0) -> np.ndarray:
"""Z-score normalization with type hints."""
mean = np.mean(data, axis=axis, keepdims=True)
std = np.std(data, axis=axis, keepdims=True)
return (data - mean) / std
# Optional parameters
def load_data(filepath: Path, subject_id: Optional[str] = None) -> np.ndarray:
"""Load data with optional subject filtering."""
data = np.load(filepath)
if subject_id is not None:
data = filter_by_subject(data, subject_id)
return data
# Multiple return types
def get_spike_times(recording: np.ndarray,
threshold: float) -> Tuple[np.ndarray, int]:
"""Detect spikes and return times with count."""
spike_indices = np.where(recording > threshold)[0]
spike_times = spike_indices / 30000.0 # Convert to seconds
return spike_times, len(spike_times)
# Dictionary types
def compute_trial_stats(trials: Dict[str, np.ndarray]) -> Dict[str, float]:
"""Compute statistics for each trial condition."""
return {
condition: np.mean(data)
for condition, data in trials.items()
}
Research Code Examples
from dataclasses import dataclass
from typing import Protocol, Union
import numpy as np
import pandas as pd
# Define data structures with types
@dataclass
class Recording:
"""Neural recording with metadata."""
data: np.ndarray
sampling_rate: float
channel_names: List[str]
subject_id: str
session_date: str
def duration_seconds(self) -> float:
"""Calculate recording duration."""
return len(self.data) / self.sampling_rate
# Protocol for type-safe interfaces
class Filter(Protocol):
"""Protocol for signal filters."""
def apply(self, signal: np.ndarray, fs: float) -> np.ndarray:
...
class ButterworthFilter:
"""Butterworth lowpass filter."""
def __init__(self, cutoff: float, order: int = 4):
self.cutoff = cutoff
self.order = order
def apply(self, signal: np.ndarray, fs: float) -> np.ndarray:
from scipy import signal as sp_signal
nyq = 0.5 * fs
normal_cutoff = self.cutoff / nyq
b, a = sp_signal.butter(self.order, normal_cutoff, btype='low')
return sp_signal.filtfilt(b, a, signal)
def process_recording(rec: Recording,
filter_obj: Filter) -> np.ndarray:
"""Process recording with any filter implementation."""
return filter_obj.apply(rec.data, rec.sampling_rate)
# Union types for multiple possibilities
def load_config(source: Union[Path, str, Dict]) -> Dict:
"""Load configuration from file or dict."""
if isinstance(source, dict):
return source
elif isinstance(source, (Path, str)):
import json
with open(source) as f:
return json.load(f)
else:
raise TypeError(f"Unsupported config source: {type(source)}")
Type Checking NumPy and Pandas
import numpy as np
import pandas as pd
from numpy.typing import NDArray
# Specify array element types
def smooth_signal(data: NDArray[np.float64],
window_size: int) -> NDArray[np.float64]:
"""Apply moving average smoothing."""
kernel = np.ones(window_size) / window_size
return np.convolve(data, kernel, mode='same')
# DataFrame type hints
def analyze_trials(df: pd.DataFrame) -> pd.DataFrame:
"""Analyze trial data with type checking."""
required_columns = ['trial_id', 'response_time', 'correct']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"Missing column: {col}")
summary = df.groupby('trial_id').agg({
'response_time': 'mean',
'correct': 'sum'
})
return summary
Running mypy
# Check single file
mypy analysis.py
# Check entire project
mypy src/
# Show error codes
mypy --show-error-codes analysis.py
# Strict mode (recommended for new projects)
mypy --strict analysis.py
Configuration
Create mypy.ini or add to pyproject.toml:
[mypy]
python_version = 3.12
warn_return_any = True
warn_unused_configs = True
disallow_untyped_defs = True
# Per-module settings
[mypy-scipy.*]
ignore_missing_imports = True
[mypy-matplotlib.*]
ignore_missing_imports = True
Benefits for Research Code
Catch errors early:
def compute_power_spectrum(signal: np.ndarray,
fs: float) -> Tuple[np.ndarray, np.ndarray]:
from scipy import signal as sp_signal
freqs, psd = sp_signal.welch(signal, fs=fs)
return freqs, psd
# mypy catches type error before runtime
freqs = compute_power_spectrum(data, fs=1000) # Error: expects tuple, got ndarray
Document interfaces:
# Type hints serve as inline documentation
def align_signals(reference: np.ndarray,
target: np.ndarray,
max_lag: int = 100) -> Tuple[np.ndarray, int]:
"""
Align two signals using cross-correlation.
Returns:
aligned_target: Target signal shifted to align with reference
lag: Number of samples target was shifted
"""
...
IDE autocompletion:
- Type hints enable better code suggestions
- Catch typos in attribute/method names
- Show function signatures
When to Use mypy
Best for:
- Analysis pipelines with multiple stages
- Shared code libraries
- Large projects with multiple contributors
- Code that processes different data types
Start with:
- Add types to function signatures
- Use
# type: ignorefor tricky cases - Gradually increase strictness
Common Research Patterns
from typing import TypeVar, Callable
# Generic types for flexible functions
T = TypeVar('T', np.ndarray, pd.DataFrame)
def apply_to_trials(trials: List[T],
func: Callable[[T], T]) -> List[T]:
"""Apply function to each trial."""
return [func(trial) for trial in trials]
# Literal types for specific values
from typing import Literal
def filter_signal(signal: np.ndarray,
filter_type: Literal['lowpass', 'highpass', 'bandpass'],
cutoff: Union[float, Tuple[float, float]]) -> np.ndarray:
"""Filter with validated filter type."""
...
Integration with Development Tools
- Pre-commit hooks: Run mypy before commits
- CI/CD: Include type checking in pipelines
- VS Code: Real-time type checking with Pylance
- pytest: Use
pytest-mypyplugin for testing types