Source code for munin.provider.keywords

#!/usr/bin/env python
# encoding: utf-8

"""
Overview
--------


Reference
---------
"""

import logging
LOGGER = logging.getLogger(__name__)


from munin.provider import Provider
from munin.session import get_cache_path
from munin.rake import extract_keywords

try:
    import plyr
    HAS_PLYR = True
except ImportError:
    HAS_PLYR = False
    LOGGER.debug('No plyr support enabled')


[docs]class KeywordsProvider(Provider): """Splits an arbitary text into a list of keywordsets. Each keywordset contains one more words that are considered to be repeated often or that are distinct in the text. **Takes:** An arbitary text, or a one-element tuple with a string. **Gives:** A list of keywordsets, similar to the WordlistProvider. """ def do_process(self, text): if isinstance(text, tuple): text = text[0] language_code, keywords_map = extract_keywords(text) if language_code is None or not keywords_map: return None return ( language_code, [set(keys) for keys, rating in keywords_map.items() if rating >= 2.0] )
[docs]def check_for_plyr(): """Returns True if the plyr lyrics provider is available""" return HAS_PLYR
[docs]class PlyrLyricsProvider(Provider): """Retrieve a lyrics text from the web using libglyr. .. note:: Many people have `.lyrics` files along their music files. This is *not* checked here, although libglyr is capable of that. **Takes:** A tuple of (artist, title), **outputs** a one-element tuple with the lyrics text in it. **Example Usage:** .. code-block:: python >>> p = PlyrLyricsProvider() >>> p.do_process(('Akrea', 'Trugbild')) ('lots of text', ) In the :term:`Mask` it should be used as: .. code-block:: python PlyrLyricsProvider() | KeywordsProvider() .. warning:: If a artist/title combination is not found the result is remembered. If you do not want this behaviour set the ``cache_failures`` argument to False. :: >>> PlyrLyricsProvider(cache_failures=False) .. note:: When feeding tags from your music database as artist/title it is recommended to use the album_artist and the track_artist as fallback. """ def __init__(self, **kwargs): if not HAS_PLYR: raise LookupError('Plyr could be imported, which is needed for lyrics') self._cache_failures = kwargs.pop('cache_failures', True) self.database = plyr.Database(get_cache_path(None)) Provider.__init__(self, **kwargs) def do_process(self, artist_title): artist, title = artist_title LOGGER.debug('artist={a} title={t} ({v})'.format( a=artist, t=title, v=plyr.version() )) qry = plyr.Query( get_type='lyrics', artist=artist, title=title, database=self.database, parallel=4, timeout=5 ) items = qry.commit() if items: first = items[0] # This item was cached on failure: if first.rating is -1: return None try: return (first.data.decode('utf-8'), ) except UnicodeDecodeError: pass if self._cache_failures: dummy = self.database.make_dummy() self.database.insert(qry, dummy) return None
if __name__ == '__main__': import sys if '--cli' in sys.argv: prov = PlyrLyricsProvider() | KeywordsProvider() for kwset in prov.do_process((sys.argv[2], sys.argv[3])): print(list(kwset))

Related Topics

Useful links:

Package:

Github: