Radix/examples/modernbert/activations/activations.py

import logging
import torch
from transformers import pipeline
from tools.zml_utils import ActivationCollector

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)
log = logging.getLogger(__name__)

MODEL_NAME: str = "answerdotai/ModernBERT-base"


def main() -> None:
    try:
        log.info("Start running main()")

        log.info(f"CPU capability : `{torch.backends.cpu.get_cpu_capability()}`")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        log.info(f"Loading model : `{MODEL_NAME}`")

        fill_mask_pipeline = pipeline(
            "fill-mask",
            model=MODEL_NAME,
            device_map=device,
        )
        model, tokenizer = fill_mask_pipeline.model, fill_mask_pipeline.tokenizer
        log.info(
            f"Model loaded successfully {model.config.architectures} - `{model.config.torch_dtype}` - {tokenizer.model_max_length} max tokens"  # noqa: E501
        )

        # Wrap the pipeline, and extract activations.
        # Activations files can be huge for big models,
        # so let's stop collecting after 1000 layers.
        zml_pipeline = ActivationCollector(
            fill_mask_pipeline, max_layers=1000, stop_after_first_step=True
        )

        input_text = "Paris is the [MASK] of France."
        outputs, activations = zml_pipeline(input_text)
        log.info(f"ouputs : {outputs}")

        filename = MODEL_NAME.split("/")[-1] + ".activations.pt"
        torch.save(activations, filename)
        log.info(f"Saved {len(activations)} activations to {filename}")

        log.info("End running main()")
    except Exception as exception:
        log.error(exception)
        raise


if __name__ == "__main__":
    main()