diff --git a/misc/py-kokoro/Makefile b/misc/py-kokoro/Makefile index 38e460daedaa..0deca2a331f4 100644 --- a/misc/py-kokoro/Makefile +++ b/misc/py-kokoro/Makefile @@ -1,39 +1,52 @@ PORTNAME= kokoro DISTVERSION= 0.9.4 +PORTREVISION= 1 CATEGORIES= misc python # machine-learning MASTER_SITES= PYPI PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} MAINTAINER= yuri@FreeBSD.org COMMENT= Text-to-speech inference library for Kokoro-82M model -WWW= https://github.com/hexgrad/kokoro +WWW= https://github.com/hexgrad/kokoro \ + https://huggingface.co/hexgrad/Kokoro-82M LICENSE= APACHE20 LICENSE_FILE= ${WRKSRC}/LICENSE BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hatchling>0:devel/py-hatchling@${PY_FLAVOR} RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}huggingface-hub>0:misc/py-huggingface-hub@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}loguru>0:devel/py-loguru@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}misaki>=0.9.4:misc/py-misaki@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}num2words>0:devel/py-num2words@${PY_FLAVOR} \ ${PYNUMPY} \ ${PYTHON_PKGNAMEPREFIX}pytorch>0:misc/py-pytorch@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}spacy>0:textproc/py-spacy@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}transformers>=0:misc/py-transformers@${PY_FLAVOR} -TEST_DEPENDS= ${PYTHON_PKGNAMEPREFIX}ipython>0:devel/ipython@${PY_FLAVOR} \ +# extra run dependencies for kokoro-text-to-audio and kokoro-text-to-wav +RUN_DEPENDS+= ${PYTHON_PKGNAMEPREFIX}ipython>0:devel/ipython@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}SoundFile>0:audio/py-SoundFile@${PY_FLAVOR} \ mpv:multimedia/mpv USES= python -USE_PYTHON= autoplist concurrent pep517 +USE_PYTHON= pep517 concurrent autoplist NO_ARCH= yes +PLIST_FILES= bin/kokoro-text-to-audio \ + bin/kokoro-text-to-wav + TEST_ENV= ${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR} +post-install: + ${INSTALL_SCRIPT} ${FILESDIR}/kokoro-text-to-audio.py ${STAGEDIR}${PREFIX}/bin/kokoro-text-to-audio + ${INSTALL_SCRIPT} ${FILESDIR}/kokoro-text-to-wav.py ${STAGEDIR}${PREFIX}/bin/kokoro-text-to-wav + @${REINPLACE_CMD} -i '' 's|%%PYTHON%%|${PYTHON_CMD}|' \ + ${STAGEDIR}${PREFIX}/bin/kokoro-text-to-audio \ + ${STAGEDIR}${PREFIX}/bin/kokoro-text-to-wav + do-test: @cd ${WRKSRC} && \ ${SETENV} ${TEST_ENV} ${PYTHON_CMD} ${FILESDIR}/example.py && \ mpv 0.wav .include diff --git a/misc/py-kokoro/files/kokoro-text-to-audio.py b/misc/py-kokoro/files/kokoro-text-to-audio.py new file mode 100644 index 000000000000..71cb682737d6 --- /dev/null +++ b/misc/py-kokoro/files/kokoro-text-to-audio.py @@ -0,0 +1,44 @@ +#!%%PYTHON%% + +import sys +import os +import shutil +import subprocess +from datetime import datetime +from kokoro import KPipeline +import soundfile as sf + +def main(): + if len(sys.argv) != 2: + print("Usage: kokoro-text-to-audio ", file=sys.stderr) + sys.exit(1) + + text = sys.argv[1] + + timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") + temp_dir = f"/tmp/kokoro-{timestamp}" + temp_wav = os.path.join(temp_dir, "output.wav") + + try: + os.makedirs(temp_dir, exist_ok=True) + + pipeline = KPipeline(lang_code='a') + generator = pipeline(text, voice='af_heart') + + audio_data = None + for _, _, audio in generator: + audio_data = audio + + if audio_data is not None: + sf.write(temp_wav, audio_data, 24000) + subprocess.run(['mpv', temp_wav], check=True) + else: + print("Error: No audio generated", file=sys.stderr) + sys.exit(1) + + finally: + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + +if __name__ == '__main__': + main() diff --git a/misc/py-kokoro/files/kokoro-text-to-wav.py b/misc/py-kokoro/files/kokoro-text-to-wav.py new file mode 100644 index 000000000000..3b14fa09c6c9 --- /dev/null +++ b/misc/py-kokoro/files/kokoro-text-to-wav.py @@ -0,0 +1,42 @@ +#!%%PYTHON%% + +import sys +import os +import shutil +from datetime import datetime +from kokoro import KPipeline +import soundfile as sf + +def main(): + if len(sys.argv) != 3: + print("Usage: kokoro-text-to-wav ", file=sys.stderr) + sys.exit(1) + + text = sys.argv[1] + output_file = sys.argv[2] + + timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") + temp_dir = f"/tmp/kokoro-{timestamp}" + + try: + os.makedirs(temp_dir, exist_ok=True) + + pipeline = KPipeline(lang_code='a') + generator = pipeline(text, voice='af_heart') + + audio_data = None + for _, _, audio in generator: + audio_data = audio + + if audio_data is not None: + sf.write(output_file, audio_data, 24000) + else: + print("Error: No audio generated", file=sys.stderr) + sys.exit(1) + + finally: + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + +if __name__ == '__main__': + main() diff --git a/misc/py-kokoro/pkg-descr b/misc/py-kokoro/pkg-descr index a551be6f5900..7692605e9411 100644 --- a/misc/py-kokoro/pkg-descr +++ b/misc/py-kokoro/pkg-descr @@ -1,8 +1,11 @@ Kokoro is an open-weight Text-to-Speech (TTS) inference library designed for the Kokoro-82M model. It provides fast, small, and free text-to-speech capabilities using PyTorch and the Hugging Face ecosystem. The library supports multiple languages and voices through the Misaki grapheme-to-phoneme engine. For full English language support, install textproc/py-phonemizer separately. + +Kokoro is the fastest production quality English Text-To-Speech convertor +available as of early 2026.