mirror of
https://github.com/yv1ing/MollyAudit.git
synced 2025-09-16 14:55:50 +08:00
Add graphical interface
This commit is contained in:
@@ -13,33 +13,41 @@ from langchain.retrievers import ContextualCompressionRetriever
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
|
||||
from audit.rules import FROTIFY_RULES
|
||||
from logger import Logger
|
||||
from audit import callback
|
||||
from audit.prompt import SYSTEM_PROMPT
|
||||
from audit.language import LANGUAGE
|
||||
|
||||
reasoning_model = 'gpt-4o'
|
||||
embedding_model = 'text-embedding-3-large'
|
||||
|
||||
xml_pattern = r'<root>.*?</root>'
|
||||
|
||||
|
||||
class Audit:
|
||||
def __init__(self, fortify_rules):
|
||||
def __init__(self, base_url, api_key, reasoning_model, embedding_model, process_output_callback, result_output_callback):
|
||||
self.raw_chain = None
|
||||
self.source_files_list = []
|
||||
self.max_token = 4096
|
||||
self.fortify_rules = fortify_rules
|
||||
self.reasoning_model = reasoning_model
|
||||
self.embedding_model = embedding_model
|
||||
self.fortify_rules = FROTIFY_RULES
|
||||
self.process_output_callback = process_output_callback
|
||||
self.result_output_callback = result_output_callback
|
||||
self.chat_history = ChatMessageHistory()
|
||||
self.session_id = uuid.uuid4().hex
|
||||
self.response_callback = callback.CustomCallbackHandler()
|
||||
self.embedding = OpenAIEmbeddings(model=embedding_model)
|
||||
self.embedding = OpenAIEmbeddings(
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
model=embedding_model
|
||||
)
|
||||
self.llm = ChatOpenAI(
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
model=reasoning_model,
|
||||
streaming=True,
|
||||
callbacks=[self.response_callback]
|
||||
)
|
||||
self.log = Logger('audit')
|
||||
self.log = Logger('audit', callback=self.process_output_callback)
|
||||
self.splitter = CharacterTextSplitter(
|
||||
chunk_size=300,
|
||||
chunk_overlap=0,
|
||||
@@ -65,12 +73,28 @@ class Audit:
|
||||
('human', '{input}'),
|
||||
])
|
||||
|
||||
def audit(self, callback_function):
|
||||
self.log.info('Start auditing')
|
||||
def audit(self, event):
|
||||
if len(self.source_files_list) <= 0:
|
||||
self.log.error('没有找到源代码文件')
|
||||
return
|
||||
|
||||
self.log.info('开始代码审计流程')
|
||||
self.log.info(f'当前推理模型:{self.reasoning_model}')
|
||||
self.log.info(f'当前嵌入模型:{self.embedding_model}')
|
||||
|
||||
input_content = ''
|
||||
while True:
|
||||
result = self.send_message(input_content)
|
||||
if event.is_set():
|
||||
return
|
||||
|
||||
try:
|
||||
result = self.send_message(input_content)
|
||||
except Exception as e:
|
||||
self.log.error(e)
|
||||
return
|
||||
|
||||
if event.is_set():
|
||||
return
|
||||
|
||||
if xml_match := re.search(xml_pattern, result, re.DOTALL):
|
||||
try:
|
||||
@@ -80,33 +104,36 @@ class Audit:
|
||||
action = root.find('action').text
|
||||
content = root.find('content').text
|
||||
except Exception as e:
|
||||
self.log.error(f'Illegal output, try to correct')
|
||||
print(result)
|
||||
print(e)
|
||||
self.log.error(f'动作指令不合法,尝试纠正')
|
||||
input_content = 'ILLEGAL OUTPUT'
|
||||
continue
|
||||
|
||||
if action == 'QUERY STRUCTURE':
|
||||
self.log.info('Request project structure')
|
||||
self.log.info('请求查询项目结构')
|
||||
input_content = '\n'.join(x for x in self.source_files_list)
|
||||
continue
|
||||
elif action == 'QUERY SOURCE':
|
||||
self.log.info(f'Request source code: {content}')
|
||||
self.log.info(f'请求查询源代码:{content}')
|
||||
input_content = open(content, 'r', encoding='utf-8').read()
|
||||
continue
|
||||
elif action == 'QUERY FORTIFY':
|
||||
self.log.info(f'Request fortify: {content}')
|
||||
self.log.info(f'请求查询规则库:{content}')
|
||||
input_content = '\n'.join(x for x in self.fortify_rules if x == content)
|
||||
continue
|
||||
elif action == 'OUTPUT RESULT':
|
||||
self.log.warning(f'Audit result: \n\n{content}')
|
||||
self.log.warning('输出代码审计结果')
|
||||
self.result_output_callback(content)
|
||||
self.store_messages_in_faiss(content)
|
||||
callback_function(content) # Callback function, used to obtain results externally
|
||||
input_content = ''
|
||||
input_content = 'ok'
|
||||
continue
|
||||
elif action == 'FINISH TASK':
|
||||
self.log.info(content)
|
||||
self.log.info('代码审计任务已完成')
|
||||
return
|
||||
else:
|
||||
self.log.critical(f'Unknown action! {action}')
|
||||
break
|
||||
self.log.error(f'动作指令未定义:{action}')
|
||||
return
|
||||
|
||||
def send_message(self, input_content):
|
||||
self.response_callback.temp_content = ''
|
||||
@@ -140,20 +167,18 @@ class Audit:
|
||||
text_embedding = self.embedding.embed_query(message)
|
||||
doc_id = str(uuid.uuid4())
|
||||
self.messages_db.add_embeddings([(doc_id, text_embedding)], metadatas=[{"id": doc_id}])
|
||||
self.log.info(f"Audit result stored in messages_db with ID: {doc_id}")
|
||||
self.log.info(f"代码审计结果已缓存,文档编号:{doc_id}")
|
||||
|
||||
def load_source_files(self, path, lang):
|
||||
self.log.info('Loading source files')
|
||||
|
||||
if lang in LANGUAGE:
|
||||
suffixes = LANGUAGE[lang]
|
||||
else:
|
||||
self.log.critical('Language not supported!')
|
||||
self.log.error('不支持的编程语言')
|
||||
return
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
self.source_files_list.extend(
|
||||
os.path.join(root, file) for file in files if any(file.endswith(suffix) for suffix in suffixes)
|
||||
os.path.join(root, file).replace('\\', '/') for file in files if any(file.endswith(suffix) for suffix in suffixes)
|
||||
)
|
||||
|
||||
self.log.info(f'Finished loading source files. total files: {len(self.source_files_list)}')
|
||||
self.log.info(f'源代码文件加载完成,共:{len(self.source_files_list)} 个')
|
||||
|
||||
117
audit/prompt.py
117
audit/prompt.py
@@ -1,59 +1,76 @@
|
||||
SYSTEM_PROMPT = """
|
||||
You are an intelligent code auditor. I will provide you with a source code. Please strictly follow the following requirements to conduct code audit.
|
||||
During the audit process, you can refer to Fortify's rule base(Execute Action 3), but it does not have to be completely consistent to determine the existence of a vulnerability. The rule base format provided to you is as follows:
|
||||
You are a professional code audit security expert, responsible for helping users audit possible vulnerabilities and security issues in source code.
|
||||
You will perform code audits according to the following process:
|
||||
|
||||
1. Query project structure
|
||||
You input the action command in the following format, and the user will send you the absolute path of all source files in the project below:
|
||||
<root>
|
||||
<action>QUERY STRUCTURE</action>
|
||||
<content></content>
|
||||
</root>
|
||||
|
||||
2. Query the vulnerability detection rule base
|
||||
You input the action instructions in the following format, and the user will send you the vulnerability detection rule library extracted from Fortify as a reference for your code audit:
|
||||
<root>
|
||||
<action>QUERY FORTIFY</action>
|
||||
<content>The language you want to query, options are: c, cpp, go, php, jsp, java, python, javascript</content>
|
||||
</root>
|
||||
|
||||
3. Query the source code
|
||||
You input the action command in the following format, and the user will send you the source code you need below:
|
||||
<root>
|
||||
<action>QUERY SOURCE</action>
|
||||
<content>the absolute path of the file you want to query</content>
|
||||
</root>
|
||||
|
||||
4. Output code audit results
|
||||
You input the code audit results in the following format, and the user will send you "ok", then you can proceed to the next step of the audit:
|
||||
<root>
|
||||
<action>OUTPUT RESULT</action>
|
||||
<content>the audit results you want to output</content>
|
||||
</root>
|
||||
|
||||
5. Finish audit task
|
||||
When you are sure that all source code files have been audited, you can output the action instructions to end the task in the following format:
|
||||
<root>
|
||||
<action>FINISH TASK</action>
|
||||
<content></content>
|
||||
</root>
|
||||
|
||||
All your output can only be one of the five actions mentioned above. Any other form of output is strictly prohibited.
|
||||
|
||||
|
||||
Some additional information, which are some specifications when you perform actions:
|
||||
1. The format of the vulnerability detection rule base provided to you is as follows:
|
||||
{
|
||||
'language':
|
||||
'vuln_kingdom':
|
||||
'vuln_category':
|
||||
}
|
||||
|
||||
Before officially starting the audit, it is recommended to query the Fortify rule base as a reference.
|
||||
All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.), and it is forbidden to bring "`" when outputting.
|
||||
You can choose to perform the following actions:
|
||||
2. When you output the code audit results, you must use Chinese output and follow the following format:
|
||||
漏洞类型:
|
||||
漏洞文件:
|
||||
相关代码:
|
||||
修复建议:
|
||||
|
||||
1. Query project structure:
|
||||
<root>
|
||||
<action>QUERY STRUCTURE</action>
|
||||
<content></content>
|
||||
</root>
|
||||
|
||||
2. Query code files
|
||||
<root>
|
||||
<action>QUERY SOURCE</action>
|
||||
<content>the absolute path of the file you want to query</content>
|
||||
</root>
|
||||
|
||||
3. Query fortify
|
||||
<root>
|
||||
<action>QUERY FORTIFY</action>
|
||||
<content>The language you want to query, options are: c, cpp, go, php, jsp, java, python, javascript</content>
|
||||
</root>
|
||||
|
||||
4. Output audit results
|
||||
<root>
|
||||
<action>OUTPUT RESULT</action>
|
||||
<content>the audit results you want to output</content>
|
||||
</root>
|
||||
|
||||
The output result format is as follows(JSON):
|
||||
{
|
||||
"Vulnerability Type":
|
||||
"Vulnerability File":
|
||||
"Vulnerability Code Summary":
|
||||
"Vulnerability repair suggestions":
|
||||
}
|
||||
|
||||
5. End the audit task
|
||||
<root>
|
||||
<action>FINISH TASK</action>
|
||||
<content></content>
|
||||
</root>
|
||||
|
||||
Important things:
|
||||
1. When the user sends you "nothing", you need to decide the next step based on the current audit progress;
|
||||
2. When you make an action to query the project structure, the user will send you the following format (C:\\Users\\yvling\\Desktop\\PHP-Vuln\\src\\index.php), which is a text containing the absolute paths of several source code files. You need to construct the project structure that you can understand based on these contents;
|
||||
3. When you need to query the content of a code file, please note that you can only query one file at a time. Please follow The above format outputs the absolute path of the file to be queried;
|
||||
4. After you output the audit results, the user will reply with an empty string. Please make sure that all code files have been audited before ending the audit task;
|
||||
5. In any case, you must strictly follow the several action formats given above for output. Any content outside the output format is prohibited. Do not try to ask or suggest;
|
||||
6. When the user prompts "ILLEGAL OUTPUT", it means that your output violates the user's specifications. Please confirm again that all your output must comply with the user's specifications.
|
||||
Some Mandatory regulations:
|
||||
1. Output Format:
|
||||
a. Strictly use the predefined XML tag structure
|
||||
b. Any Markdown symbols are not allowed
|
||||
c. No line breaks in the content field
|
||||
2. Language Standards:
|
||||
a. Technical terms are kept in their original English
|
||||
b. Vulnerability descriptions must be in Chinese
|
||||
3. Interaction restrictions:
|
||||
a. Any content outside the output process is prohibited
|
||||
b. Autonomously advance the audit process when receiving "nothing" or "ok"
|
||||
c. Vulnerabilities must be output immediately
|
||||
4. Error handling:
|
||||
a. When receiving the "ILLEGAL OUTPUT" prompt, terminate the current output immediately and recheck the format specification before continuing
|
||||
5. Priority logic:
|
||||
a. Entry file > Configuration file > Tool file
|
||||
b. High-risk vulnerabilities (such as injection and RCE) are handled first
|
||||
c. If multiple vulnerabilities are found in the same file, they need to be output multiple times
|
||||
d. For vulnerabilities that may span files, the audit can only begin after the relevant files have been queried as needed
|
||||
"""
|
||||
|
||||
10845
audit/rules.py
Normal file
10845
audit/rules.py
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user