ml-explore/mlx-lm test_data on GitHub

Commands (assuming the blow script is in dl_test_data.py):

HF_HOME="." python dl_test_data.py
zip -r test_data.zip datasets hub
gh release upload test_data test_data.zip

import datasets
from huggingface_hub import snapshot_download

repos = [
    "mlx-community/Qwen1.5-0.5B-Chat-4bit",
    "mlx-community/Mistral-7B-v0.2-4bit",
    "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
    "mlx-community/Mistral-7B-Instruct-v0.3",
    "mlx-community/Phi-3.5-mini-instruct-4bit",
    "mlx-community/Llama-3.2-1B-Instruct-4bit",
    "mlx-community/Falcon3-7B-Instruct-4bit",
    "mlx-community/Qwen3-4B-4bit",
]

allow_patterns = [
            "*.md",
            "*.json",
            "*.py",
            "tokenizer.model",
            "*.tiktoken",
            "tiktoken.model",
            "*.txt",
            "*.jsonl",
            "*.jinja",
]

for repo in repos:
    snapshot_download(
        repo,
        allow_patterns=allow_patterns,
    )

snapshot_download(
    "mlx-community/Qwen1.5-0.5B-Chat-4bit",
    allow_patterns=["model*.safetensors"],
)

datasets.load_dataset("billsum")