Source code for chunkipy.text_splitters.basic_text_splitters

from typing import List
from typing_extensions import override
from chunkipy.text_splitters.base_text_splitter import BaseTextSplitter


[docs] class SeparatorTextSplitter(BaseTextSplitter): def __init__(self, separator: str): if not separator or not isinstance (separator, str): raise ValueError("Provide a valid non-empty separator.") self._separator = separator @property def separator(self) -> str: return self._separator @override def _split(self, text: str) -> List[str]: text_pieces = text.split(self.separator) text_pieces = [t + self.separator for t in text_pieces if t != ' ' and t != ''] text_pieces[-1] = text_pieces[-1][:-len(self.separator)] return text_pieces
[docs] class SemicolonTextSplitter (SeparatorTextSplitter): def __init__(self): super().__init__(separator="; ")
[docs] class ColonTextSplitter(SeparatorTextSplitter): def __init__(self): super().__init__(separator=": ")
[docs] class CommaTextSplitter(SeparatorTextSplitter): def __init__(self): super().__init__(separator=", ")
[docs] class FullStopTextSplitter(SeparatorTextSplitter): def __init__(self): super().__init__(separator=". ")
[docs] class NewlineTextSplitter(SeparatorTextSplitter): def __init__(self): super().__init__(separator="\n")
[docs] class WordTextSplitter(SeparatorTextSplitter): def __init__(self): super().__init__(separator=" ")