Introducing the Fortify rule library

This commit is contained in:
2025-02-09 14:28:47 +08:00
parent 6a0cab65d1
commit aee63534a6
8 changed files with 10933 additions and 15 deletions

View File

@@ -1,8 +1,6 @@
import os
import re
import time
import uuid
import tiktoken
import xml.etree.ElementTree as ET
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.messages import SystemMessage
@@ -20,17 +18,18 @@ from audit import callback
from audit.prompt import SYSTEM_PROMPT
from audit.language import LANGUAGE
reasoning_model = 'gemini-2.0-flash-thinking-exp'
reasoning_model = 'gpt-4o'
embedding_model = 'text-embedding-3-large'
xml_pattern = r'<root>.*?</root>'
class Audit:
def __init__(self):
def __init__(self, fortify_rules):
self.raw_chain = None
self.source_files_list = []
self.max_token = 4096
self.fortify_rules = fortify_rules
self.chat_history = ChatMessageHistory()
self.session_id = uuid.uuid4().hex
self.response_callback = callback.CustomCallbackHandler()
@@ -93,12 +92,18 @@ class Audit:
self.log.info(f'Request source code: {content}')
input_content = open(content, 'r', encoding='utf-8').read()
continue
elif action == 'QUERY FORTIFY':
self.log.info(f'Request fortify: {content}')
input_content = '\n'.join(x for x in self.fortify_rules if x == content)
continue
elif action == 'OUTPUT RESULT':
self.log.warning(f'Audit result: \n\n{content}')
self.store_messages_in_faiss(content)
callback_function(content) # Callback function, used to obtain results externally
input_content = ''
continue
elif action == 'FINISH TASK':
self.log.info(content)
else:
self.log.critical(f'Unknown action! {action}')
break
@@ -126,8 +131,6 @@ class Audit:
history_messages_key='messages',
)
self.log.debug(f'Chat messages: {input_dict}')
for _ in chain_with_message_history.stream(input_dict, config_dict):
pass

View File

@@ -1,8 +1,9 @@
LANGUAGE = {
'c': ['.c'],
'c++': ['.cc', '.cpp'],
'cpp': ['.cc', '.cpp'],
'go': ['.go'],
'php': ['php', 'php3', 'php4', 'php5', 'phtml'],
'jsp': ['.jsp'],
'java': ['.java'],
'python': ['.py'],
'javascript': ['.js'],

View File

@@ -1,7 +1,15 @@
SYSTEM_PROMPT = """
You are an intelligent code auditor. I will provide you with a source code. Please strictly follow the following requirements to conduct code audit.
All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.).
and it is forbidden to bring "`" when outputting:
During the audit process, you can refer to Fortify's rule base(Execute Action 3), but it does not have to be completely consistent to determine the existence of a vulnerability. The rule base format provided to you is as follows:
{
'language':
'vuln_kingdom':
'vuln_category':
}
Before officially starting the audit, it is recommended to query the Fortify rule base as a reference.
All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.), and it is forbidden to bring "`" when outputting.
You can choose to perform the following actions:
1. Query project structure:
<root>
@@ -15,13 +23,27 @@ and it is forbidden to bring "`" when outputting:
<content>the absolute path of the file you want to query</content>
</root>
3. Output audit results
3. Query fortify
<root>
<action>QUERY FORTIFY</action>
<content>The language you want to query, options are: c, cpp, go, php, jsp, java, python, javascript</content>
</root>
4. Output audit results
<root>
<action>OUTPUT RESULT</action>
<content>the audit results you want to output</content>
</root>
4. End the audit task
The output result format is as follows(JSON):
{
"Vulnerability Type":
"Vulnerability File":
"Vulnerability Code Summary":
"Vulnerability repair suggestions":
}
5. End the audit task
<root>
<action>FINISH TASK</action>
<content></content>

10845
fortify_rules.json Normal file

File diff suppressed because one or more lines are too long

11
main.py
View File

@@ -1,3 +1,4 @@
import json
import os
import warnings
from audit import Audit
@@ -6,7 +7,9 @@ from audit import Audit
warnings.simplefilter('ignore', FutureWarning)
os.environ['OPENAI_API_BASE'] = 'https://yunwu.ai/v1'
os.environ['OPENAI_API_KEY'] = 'sk-zpkHfWT0Zhvzc79lX11WS4dEyg5CkQ3RdZOSNDoLADaitfVM'
os.environ['OPENAI_API_KEY'] = 'sk-FdKVL1IiRCMhTVScD4iIEfE2U7978rKuAQhPl0Gbr55l6fDD'
fortify_rules = json.load(open('fortify_rules.json', 'r', encoding='utf-8'))
def result_callback(result):
@@ -14,10 +17,10 @@ def result_callback(result):
if __name__ == '__main__':
src_root = r'C:\Users\yvling\Desktop\JavaSecLab'
language = 'java'
src_root = r'C:\Users\yvling\Desktop\PHP-Vuln'
language = 'php'
audit = Audit()
audit = Audit(fortify_rules)
audit.load_source_files(src_root, language)
audit.audit(result_callback)

0
rules/__init__.py Normal file
View File

View File

44
rules/fortify/fortify.py Normal file
View File

@@ -0,0 +1,44 @@
import json
import os
import xml.etree.ElementTree as ET
rules_list = []
def extract_rules(xml_file):
tree = ET.parse(xml_file)
root = tree.getroot()
rules = root.findall('.//{xmlns://www.fortifysoftware.com/schema/rules}StructuralRule')
for rule in rules:
rule_info = {}
vuln_kingdom = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnKingdom')
vuln_category = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnCategory')
vuln_subcategory = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnSubcategory')
predicate = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}Predicate')
rule_info['language'] = rule.get('language')
if rule_info['language'] in ['c', 'cpp', 'go', 'php', 'jsp', 'java', 'python', 'javascript']:
rule_info['vuln_kingdom'] = vuln_kingdom.text.replace(' ', ' ') if vuln_kingdom is not None else None
rule_info['vuln_category'] = vuln_category.text.replace(' ', ' ') if vuln_category is not None else None
rule_info['vuln_subcategory'] = vuln_subcategory.text.replace(' ', ' ') if vuln_subcategory is not None else None
rule_info['predicate'] = predicate.text.replace(' ', ' ') if predicate is not None else None
rules_list.append(rule_info)
def load_fortify_rules(src_path):
for root, dirs, files in os.walk(src_path):
for file_name in files:
if file_name.endswith('.xml'):
file_path = os.path.join(root, file_name)
extract_rules(file_path)
open('../../fortify_rules.json', 'w', encoding='utf-8').write(json.dumps(rules_list))
if __name__ == '__main__':
load_fortify_rules(r'C:\Users\yvling\Desktop\data')