python-archieve-projects/1.26 自动化测试/hanlp_tokenizier.py

99 lines
3.1 KiB
Python
Executable File

from pyhanlp import *
import utils
def open_file(tokens):
sentences = []
with open(tokens, "r", encoding="utf-8") as f:
for sentence in f.readlines():
sentence = sentence.strip()
sentences.append(sentence)
return sentences
def tokenizier(sentences):
use_cases = []
for sentence in sentences:
condition = [[], "", ""]
else_flag = False
# s: result of segment
s = HanLP.segment(sentence)
control = 999
for i in range(len(s)):
if "" in str(s[i].word) and str(s[i].nature) == 'c':
if not condition[0].__contains__(s[i - 1].word):
condition[0].append(s[i - 1].word)
if not condition[0].__contains__(s[i + 1].word):
condition[0].append(s[i + 1].word)
else_flag = True
elif str(s[i].nature) == 'p':
condition[1] = s[i].word
control = i
elif str(s[i].nature) == 'c':
control = i
condition[1] = s[i].word
elif str(s[i].nature) == 'd':
control = i
condition[1] = s[i].word
elif str(s[i].nature) == 'v' and condition[1] != "":
condition[1] += s[i].word
elif str(s[i].nature) != 'c' and i > control:
condition[2] += s[i].word
else:
condition[0].append(s[i].word)
# 去重
condition[0] = list(set(condition[0]))
if not else_flag:
string = "".join(condition[0])
condition[0].clear()
condition[0] = [string]
use_cases.append(condition)
return use_cases
def generateConditions(useCases):
allConditions = []
for case in useCases:
if len(case[0]) > 1:
for i in case[0]:
string = ""
string += i
string += case[1] + case[2]
allConditions.append(string)
else:
string = ""
string = "".join(case[0]) + "".join(case[1:])
allConditions.append(string)
with open("allConditions.txt", "w", encoding="utf-8") as f:
for line in allConditions:
f.write(line)
f.write('\n')
def generateUseCases(filename):
cases = []
with open(filename, "r") as f:
for line in f.readlines():
cases.append(line.replace("\n", ""))
for case in cases:
single_case = utils.case_template
if "不为" in case:
start = case.index("")
except_condition = case[start+2:]
attribute_name = case[:start]
if except_condition == '':
with open("./generated/" + case + ".txt", 'w', encoding='utf-8') as file_write:
single_case[attribute_name] = "test"
print(single_case.__str__())
file_write.write(single_case.__str__())
if __name__ == '__main__':
sentences = open_file("main_info.txt")
useCases = tokenizier(sentences)
generateConditions(useCases)
generateUseCases("allConditions.txt")