Set up data generation pipeline
- Add slurm script for data generation - Finalize Qwen-2.5-coder-32B model for data generation
Showing
- .gitignore 1 addition, 0 deletions.gitignore
- data_generation/main.py 110 additions, 0 deletionsdata_generation/main.py
- data_generation/main_qwen.py 390 additions, 0 deletionsdata_generation/main_qwen.py
- data_generation/slurm.sh 38 additions, 0 deletionsdata_generation/slurm.sh
- data_generation/slurm_qwen.sh 38 additions, 0 deletionsdata_generation/slurm_qwen.sh
- requirements.txt 48 additions, 0 deletionsrequirements.txt
.gitignore
0 → 100644
data_generation/main.py
0 → 100644
data_generation/main_qwen.py
0 → 100644
data_generation/slurm.sh
0 → 100644
data_generation/slurm_qwen.sh
0 → 100644
requirements.txt
0 → 100644
accelerate==1.1.1 | |||
aiohappyeyeballs==2.4.3 | |||
aiohttp==3.11.8 | |||
aiosignal==1.3.1 | |||
attrs==24.2.0 | |||
certifi==2024.8.30 | |||
charset-normalizer==3.4.0 | |||
colorama==0.4.6 | |||
datasets==3.1.0 | |||
dill==0.3.8 | |||
filelock==3.16.1 | |||
frozenlist==1.5.0 | |||
fsspec==2024.9.0 | |||
huggingface-hub==0.26.3 | |||
idna==3.10 | |||
Jinja2==3.1.3 | |||
MarkupSafe==2.1.5 | |||
mpmath==1.3.0 | |||
multidict==6.1.0 | |||
multiprocess==0.70.16 | |||
networkx==3.2.1 | |||
numpy==2.1.3 | |||
packaging==24.2 | |||
pandas==2.2.3 | |||
pillow==10.2.0 | |||
propcache==0.2.0 | |||
psutil==6.1.0 | |||
pyarrow==18.1.0 | |||
python-dateutil==2.9.0.post0 | |||
pytz==2024.2 | |||
PyYAML==6.0.2 | |||
regex==2024.11.6 | |||
requests==2.32.3 | |||
safetensors==0.4.5 | |||
setuptools==70.0.0 | |||
six==1.16.0 | |||
sympy==1.13.1 | |||
tokenizers==0.20.3 | |||
torch==2.5.1+cu118 | |||
torchaudio==2.5.1+cu118 | |||
torchvision==0.20.1+cu118 | |||
tqdm==4.67.1 | |||
transformers==4.46.3 | |||
typing_extensions==4.12.2 | |||
tzdata==2024.2 | |||
urllib3==2.2.3 | |||
xxhash==3.5.0 | |||
yarl==1.18.0 |
Please register or sign in to comment