Commands (assuming the blow script is in dl_test_data.py):
HF_HOME="." python dl_test_data.py
zip -r test_data.zip datasets hub
gh release upload test_data test_data.zip
import datasets
from huggingface_hub import snapshot_download
repos = [
"mlx-community/Qwen1.5-0.5B-Chat-4bit",
"mlx-community/Mistral-7B-v0.2-4bit",
"mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
"mlx-community/Mistral-7B-Instruct-v0.3",
"mlx-community/Phi-3.5-mini-instruct-4bit",
"mlx-community/Llama-3.2-1B-Instruct-4bit",
"mlx-community/Falcon3-7B-Instruct-4bit",
"mlx-community/Qwen3-4B-4bit",
]
allow_patterns = [
"*.md",
"*.json",
"*.py",
"tokenizer.model",
"*.tiktoken",
"tiktoken.model",
"*.txt",
"*.jsonl",
"*.jinja",
]
for repo in repos:
snapshot_download(
repo,
allow_patterns=allow_patterns,
)
snapshot_download(
"mlx-community/Qwen1.5-0.5B-Chat-4bit",
allow_patterns=["model*.safetensors"],
)
datasets.load_dataset("billsum")