PK!thcleanser/__init__.py"""Cleanser""" from cleanser.core.generic import Generic from cleanser.core.reddit import Reddit class Cleanser(Generic, Reddit): """Class does stuff""" def __init__(self, text): super().__init__() self.text = text PK!ճ%%cleanser/core/__init__.py"""Common functions and classes for all core classes.""" class Base: """Base class for all cleansers""" def __init__(self): self._text = None @property def text(self): """Property for text that is being cleansed""" return self._text @text.setter def text(self, value): """Property setter for text that is being cleansed""" self._text = value def __repr__(self): return f"{self.__class__.__name__}({self.text!r})" def __str__(self): return f"{self.text}" PK! Q//cleanser/core/generic.py"""Common generic methods for cleaning text.""" import re from cleanser.core import Base RE_WHITESPACE = re.compile(r"\s+") RE_EMOJI = re.compile(r"[\U00010000-\U0010ffff]", flags=re.UNICODE) URL_REGEX = re.compile( r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" ) class Generic(Base): """Common generic methods for cleaning text.""" def whitespaces(self): """Removes extra spaces, tabs, and newlines from text.""" self.text = RE_WHITESPACE.sub(" ", self.text).strip() return self def emojis(self): """Removes emojis from text.""" self.text = RE_EMOJI.sub("", self.text) return self def urls(self): """Removes urls from text.""" self.text = URL_REGEX.sub("", self.text) return self PK!Kq//cleanser/core/reddit.py"""Utilities specifically for cleaning text from Reddit.""" import re from cleanser.core import Base REDDIT_MENTIONS = re.compile(r"/?u/\S+") REDDIT_SUBREDDITS = re.compile(r"/?r/\S+") REDDIT_QUOTES = re.compile(r"^\>.*$", flags=re.MULTILINE) REDDIT_BOLD_ITALICS = re.compile(r"\*+") REDDIT_CODE = re.compile(r"\`") REDDIT_SUPERSCRIPT = re.compile(r"\^") REDDIT_HEADERS = re.compile(r"\#+") REDDIT_STRIKETHROUGH = re.compile(r"~{2}.*~{2}") REDDIT_SPOILERS = re.compile(r">!|!<") # REDDIT_LINKS = re.compile(r"\[.*\]\(.*\)") class Reddit(Base): """Common methods for cleaning text from Reddit.""" def reddit_mentions(self): """Removes reddit user mentions from text.""" self.text = REDDIT_MENTIONS.sub("", self.text) return self def reddit_subreddits(self): """Removes reddit subreddit mentions from text.""" self.text = REDDIT_SUBREDDITS.sub("", self.text) return self def reddit_quotes(self): """Removes reddit quotes from text.""" self.text = REDDIT_QUOTES.sub("", self.text) return self def reddit_bold_italics(self): """Removes reddit bolding and italics from text.""" self.text = REDDIT_BOLD_ITALICS.sub("", self.text) return self def reddit_code(self): """Removes reddit code formatting from text.""" self.text = REDDIT_CODE.sub("", self.text) return self def reddit_superscript(self): """Removes reddit superscript formatting from text.""" self.text = REDDIT_SUPERSCRIPT.sub("", self.text) return self def reddit_headers(self): """Removes reddit header formatting from text.""" self.text = REDDIT_HEADERS.sub("", self.text) return self def reddit_strikethrough(self): """Removes reddit strikethrough formatted text.""" self.text = REDDIT_STRIKETHROUGH.sub("", self.text) return self def reddit_spoilers(self): """Removes reddit spoiler formatting from text.""" self.text = REDDIT_SPOILERS.sub("", self.text) return self PK!HڽTUcleanser-0.1.0.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!Hwm!cleanser-0.1.0.dist-info/METADATAJ1yy7V*-J/qQL&C}z=6淧X{Dz,2p0fJ[^|-A~#kczͪ;\&IMHvWvSkuswLR$6I;aDOTÕos닛 ?xC1!sC1sSZVƵPK!H^S cleanser-0.1.0.dist-info/RECORD}Kr0}ϒDaBTDe%CۍN Ȇϔ )ZJ7 F/7gd7g{']]#εUS9^`'XYtwVlUsdT!umh \ZTw^7e@;>Ȳҿdt57:]~}52ؾj%8 |Iz$ɂ d (4DB!s4'dhظUt7sTW?tE6mnǻ$ۤr=luY̐&2r|S,GCb 'PK!thcleanser/__init__.pyPK!ճ%%$cleanser/core/__init__.pyPK! Q//cleanser/core/generic.pyPK!Kq//cleanser/core/reddit.pyPK!HڽTUIcleanser-0.1.0.dist-info/WHEELPK!Hwm!cleanser-0.1.0.dist-info/METADATAPK!H^S cleanser-0.1.0.dist-info/RECORDPKx