56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
import logging
|
|
import torch
|
|
from transformers import pipeline
|
|
from tools.zml_utils import ActivationCollector
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
|
)
|
|
log = logging.getLogger(__name__)
|
|
|
|
MODEL_NAME: str = "answerdotai/ModernBERT-base"
|
|
|
|
|
|
def main() -> None:
|
|
try:
|
|
log.info("Start running main()")
|
|
|
|
log.info(f"CPU capability : `{torch.backends.cpu.get_cpu_capability()}`")
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
log.info(f"Loading model : `{MODEL_NAME}`")
|
|
|
|
fill_mask_pipeline = pipeline(
|
|
"fill-mask",
|
|
model=MODEL_NAME,
|
|
device_map=device,
|
|
)
|
|
model, tokenizer = fill_mask_pipeline.model, fill_mask_pipeline.tokenizer
|
|
log.info(
|
|
f"Model loaded successfully {model.config.architectures} - `{model.config.torch_dtype}` - {tokenizer.model_max_length} max tokens" # noqa: E501
|
|
)
|
|
|
|
# Wrap the pipeline, and extract activations.
|
|
# Activations files can be huge for big models,
|
|
# so let's stop collecting after 1000 layers.
|
|
zml_pipeline = ActivationCollector(
|
|
fill_mask_pipeline, max_layers=1000, stop_after_first_step=True
|
|
)
|
|
|
|
input_text = "Paris is the [MASK] of France."
|
|
outputs, activations = zml_pipeline(input_text)
|
|
log.info(f"ouputs : {outputs}")
|
|
|
|
filename = MODEL_NAME.split("/")[-1] + ".activations.pt"
|
|
torch.save(activations, filename)
|
|
log.info(f"Saved {len(activations)} activations to {filename}")
|
|
|
|
log.info("End running main()")
|
|
except Exception as exception:
|
|
log.error(exception)
|
|
raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|