Introducing the Fortify rule library

2025-09-16 14:55:50 +08:00 · 2025-02-09 14:28:47 +08:00
parent 6a0cab65d1
commit aee63534a6
8 changed files with 10933 additions and 15 deletions
--- a/audit/init.py
+++ b/audit/init.py
@@ -1,8 +1,6 @@
 import os
 import re
 import time
 import uuid
 import tiktoken
 import xml.etree.ElementTree as ET
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_core.messages import SystemMessage
@@ -20,17 +18,18 @@ from audit import callback
 from audit.prompt import SYSTEM_PROMPT
 from audit.language import LANGUAGE
-reasoning_model = 'gemini-2.0-flash-thinking-exp'
+reasoning_model = 'gpt-4o'
 embedding_model = 'text-embedding-3-large'
 xml_pattern = r'<root>.*?</root>'
 class Audit:
-    def __init__(self):
+    def __init__(self, fortify_rules):
        self.raw_chain = None
        self.source_files_list = []
        self.max_token = 4096
        self.fortify_rules = fortify_rules
        self.chat_history = ChatMessageHistory()
        self.session_id = uuid.uuid4().hex
        self.response_callback = callback.CustomCallbackHandler()
@@ -93,12 +92,18 @@ class Audit:
                    self.log.info(f'Request source code: {content}')
                    input_content = open(content, 'r', encoding='utf-8').read()
                    continue
                elif action == 'QUERY FORTIFY':
                    self.log.info(f'Request fortify: {content}')
                    input_content = '\n'.join(x for x in self.fortify_rules if x == content)
                    continue
                elif action == 'OUTPUT RESULT':
                    self.log.warning(f'Audit result: \n\n{content}')
                    self.store_messages_in_faiss(content)
                    callback_function(content)  # Callback function, used to obtain results externally
                    input_content = ''
                    continue
                elif action == 'FINISH TASK':
                    self.log.info(content)
                else:
                    self.log.critical(f'Unknown action! {action}')
                    break
@@ -126,8 +131,6 @@ class Audit:
            history_messages_key='messages',
        )
        self.log.debug(f'Chat messages: {input_dict}')
        for _ in chain_with_message_history.stream(input_dict, config_dict):
            pass
--- a/audit/language.py
+++ b/audit/language.py
@@ -1,8 +1,9 @@
 LANGUAGE = {
    'c': ['.c'],
-    'c++': ['.cc', '.cpp'],
+    'cpp': ['.cc', '.cpp'],
    'go': ['.go'],
    'php': ['php', 'php3', 'php4', 'php5', 'phtml'],
    'jsp': ['.jsp'],
    'java': ['.java'],
    'python': ['.py'],
    'javascript': ['.js'],
--- a/audit/prompt.py
+++ b/audit/prompt.py
@@ -1,7 +1,15 @@
 SYSTEM_PROMPT = """
 You are an intelligent code auditor. I will provide you with a source code. Please strictly follow the following requirements to conduct code audit.
-All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.). 
+During the audit process, you can refer to Fortify's rule base(Execute Action 3), but it does not have to be completely consistent to determine the existence of a vulnerability. The rule base format provided to you is as follows:
-and it is forbidden to bring "`" when outputting:
+{
    'language':
    'vuln_kingdom':
    'vuln_category':
 }
 Before officially starting the audit, it is recommended to query the Fortify rule base as a reference.
 All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.), and it is forbidden to bring "`" when outputting.
 You can choose to perform the following actions:
 1. Query project structure:
 <root>
@@ -15,13 +23,27 @@ and it is forbidden to bring "`" when outputting:
 <content>the absolute path of the file you want to query</content>
 </root>
-3. Output audit results
+3. Query fortify
 <root>
 <action>QUERY FORTIFY</action>
 <content>The language you want to query, options are: c, cpp, go, php, jsp, java, python, javascript</content>
 </root>
 4. Output audit results
 <root>
 <action>OUTPUT RESULT</action>
 <content>the audit results you want to output</content>
 </root>
-4. End the audit task
+The output result format is as follows(JSON):
 {
    "Vulnerability Type": 
    "Vulnerability File": 
    "Vulnerability Code Summary": 
    "Vulnerability repair suggestions":
 } 
 5. End the audit task
 <root>
 <action>FINISH TASK</action>
 <content></content>
--- a/fortify_rules.json
+++ b/fortify_rules.json
--- a/main.py
+++ b/main.py
@@ -1,3 +1,4 @@
 import json
 import os
 import warnings
 from audit import Audit
@@ -6,7 +7,9 @@ from audit import Audit
 warnings.simplefilter('ignore', FutureWarning)
 os.environ['OPENAI_API_BASE'] = 'https://yunwu.ai/v1'
-os.environ['OPENAI_API_KEY'] = 'sk-zpkHfWT0Zhvzc79lX11WS4dEyg5CkQ3RdZOSNDoLADaitfVM'
+os.environ['OPENAI_API_KEY'] = 'sk-FdKVL1IiRCMhTVScD4iIEfE2U7978rKuAQhPl0Gbr55l6fDD'
 fortify_rules = json.load(open('fortify_rules.json', 'r', encoding='utf-8'))
 def result_callback(result):
@@ -14,10 +17,10 @@ def result_callback(result):
 if __name__ == '__main__':
-    src_root = r'C:\Users\yvling\Desktop\JavaSecLab'
+    src_root = r'C:\Users\yvling\Desktop\PHP-Vuln'
-    language = 'java'
+    language = 'php'
-    audit = Audit()
+    audit = Audit(fortify_rules)
    audit.load_source_files(src_root, language)
    audit.audit(result_callback)
--- a/rules/init.py
+++ b/rules/init.py
--- a/rules/fortify/init.py
+++ b/rules/fortify/init.py
--- a/rules/fortify/fortify.py
+++ b/rules/fortify/fortify.py
@@ -0,0 +1,44 @@
 import json
 import os
 import xml.etree.ElementTree as ET
 rules_list = []
 def extract_rules(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    rules = root.findall('.//{xmlns://www.fortifysoftware.com/schema/rules}StructuralRule')
    for rule in rules:
        rule_info = {}
        vuln_kingdom = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnKingdom')
        vuln_category = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnCategory')
        vuln_subcategory = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnSubcategory')
        predicate = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}Predicate')
        rule_info['language'] = rule.get('language')
        if rule_info['language'] in ['c', 'cpp', 'go', 'php', 'jsp', 'java', 'python', 'javascript']:
            rule_info['vuln_kingdom'] = vuln_kingdom.text.replace('        ', ' ') if vuln_kingdom is not None else None
            rule_info['vuln_category'] = vuln_category.text.replace('        ', ' ') if vuln_category is not None else None
            rule_info['vuln_subcategory'] = vuln_subcategory.text.replace('        ', ' ') if vuln_subcategory is not None else None
            rule_info['predicate'] = predicate.text.replace('        ', ' ') if predicate is not None else None
            rules_list.append(rule_info)
 def load_fortify_rules(src_path):
    for root, dirs, files in os.walk(src_path):
        for file_name in files:
            if file_name.endswith('.xml'):
                file_path = os.path.join(root, file_name)
                extract_rules(file_path)
    open('../../fortify_rules.json', 'w', encoding='utf-8').write(json.dumps(rules_list))
 if __name__ == '__main__':
    load_fortify_rules(r'C:\Users\yvling\Desktop\data')