from pyhanlp import * import utils def open_file(tokens): sentences = [] with open(tokens, "r", encoding="utf-8") as f: for sentence in f.readlines(): sentence = sentence.strip() sentences.append(sentence) return sentences def tokenizier(sentences): use_cases = [] for sentence in sentences: condition = [[], "", ""] else_flag = False # s: result of segment s = HanLP.segment(sentence) control = 999 for i in range(len(s)): if "或" in str(s[i].word) and str(s[i].nature) == 'c': if not condition[0].__contains__(s[i - 1].word): condition[0].append(s[i - 1].word) if not condition[0].__contains__(s[i + 1].word): condition[0].append(s[i + 1].word) else_flag = True elif str(s[i].nature) == 'p': condition[1] = s[i].word control = i elif str(s[i].nature) == 'c': control = i condition[1] = s[i].word elif str(s[i].nature) == 'd': control = i condition[1] = s[i].word elif str(s[i].nature) == 'v' and condition[1] != "": condition[1] += s[i].word elif str(s[i].nature) != 'c' and i > control: condition[2] += s[i].word else: condition[0].append(s[i].word) # 去重 condition[0] = list(set(condition[0])) if not else_flag: string = "".join(condition[0]) condition[0].clear() condition[0] = [string] use_cases.append(condition) return use_cases def generateConditions(useCases): allConditions = [] for case in useCases: if len(case[0]) > 1: for i in case[0]: string = "" string += i string += case[1] + case[2] allConditions.append(string) else: string = "" string = "".join(case[0]) + "".join(case[1:]) allConditions.append(string) with open("allConditions.txt", "w", encoding="utf-8") as f: for line in allConditions: f.write(line) f.write('\n') def generateUseCases(filename): cases = [] with open(filename, "r") as f: for line in f.readlines(): cases.append(line.replace("\n", "")) for case in cases: single_case = utils.case_template if "不为" in case: start = case.index("不") except_condition = case[start+2:] attribute_name = case[:start] if except_condition == '空': with open("./generated/" + case + ".txt", 'w', encoding='utf-8') as file_write: single_case[attribute_name] = "test" print(single_case.__str__()) file_write.write(single_case.__str__()) if __name__ == '__main__': sentences = open_file("main_info.txt") useCases = tokenizier(sentences) generateConditions(useCases) generateUseCases("allConditions.txt")