nlp

some nlp tools

中文停用词列表

from my_python_module.nlp.chinese_stop_words import STOP_WORDS

nltk_utils

str2tuple

def str2tuple(s, sep="/"):
    """
    """

bigrams

def bigrams(sequence, **kwargs):
    """
    Return the bigrams generated from a sequence of items, as an iterator.
    For example:

        >>> list(bigrams([1,2,3,4,5]))
        [(1, 2), (2, 3), (3, 4), (4, 5)]
    """

FreqDist

统计词频

from collections import defaultdict, Counter
class FreqDist(Counter):
    def __init__(self, samples=None):
        """
        """

utils

is_contain_chinese

判断字符串是否含有中文

def is_contain_chinese(check_str):
    """
    判断字符串中是否包含中文
    :param check_str: {str} 需要检测的字符串
    :return: {bool} 包含返回True, 不包含返回False
    """