
```python import re from collections import defaultdict
分割句子和字
def split_words(text): words = text.split() return [word.lower() for word in words]
提取特定字眼(如文言词)
def extract文言词(text, pattern): words = text.lower().split() matches = re.findall(pattern, text) result = [] for match in matches: result.append(match) return result
判断文言词
def is_wrod(word): mapping = { "有": "有", "之": "之", "而": "而", "于": "于", "为": "为" } for char, translate in mapping.items(): if word.lower().lower()[char] != translating[char]: return False return True
分析诗句特征
def analyze_pos(text): words = text.split() pos_counts = defaultdict(int) for word in words: for p in ['正面', '负面']: pos_counts[p] = 1 if p == '正面' else -1 avg正面 = sum(pos_counts['正面']) / len(words) avg_negative = sum(pos_counts['负面']) / len(words) return {'正面平均': round(avg_positive), '负面平均': round(avg_negative)}
提取词汇和短语
def extract_terms(text, word_or phrase): # 分别提取单词和分词(包括逗号) words = re.findall(word_or phrase, text) terms = [word.lower() for word in words if len(word)
推荐阅读
查看更多相似文章
