Introducing the Fortify rule library

2025-09-16 14:55:50 +08:00 · 2025-02-09 14:28:47 +08:00
parent 6a0cab65d1
commit aee63534a6
8 changed files with 10933 additions and 15 deletions
--- a/audit/init.py
+++ b/audit/init.py
@@ -1,8 +1,6 @@
 import os
 import re
-import time
 import uuid
-import tiktoken
 import xml.etree.ElementTree as ET
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_core.messages import SystemMessage
@@ -20,17 +18,18 @@ from audit import callback
 from audit.prompt import SYSTEM_PROMPT
 from audit.language import LANGUAGE

-reasoning_model = 'gemini-2.0-flash-thinking-exp'
+reasoning_model = 'gpt-4o'
 embedding_model = 'text-embedding-3-large'

 xml_pattern = r'<root>.*?</root>'


 class Audit:
-    def __init__(self):
+    def __init__(self, fortify_rules):
        self.raw_chain = None
        self.source_files_list = []
        self.max_token = 4096
+        self.fortify_rules = fortify_rules
        self.chat_history = ChatMessageHistory()
        self.session_id = uuid.uuid4().hex
        self.response_callback = callback.CustomCallbackHandler()
@@ -93,12 +92,18 @@ class Audit:
                    self.log.info(f'Request source code: {content}')
                    input_content = open(content, 'r', encoding='utf-8').read()
                    continue
+                elif action == 'QUERY FORTIFY':
+                    self.log.info(f'Request fortify: {content}')
+                    input_content = '\n'.join(x for x in self.fortify_rules if x == content)
+                    continue
                elif action == 'OUTPUT RESULT':
                    self.log.warning(f'Audit result: \n\n{content}')
                    self.store_messages_in_faiss(content)
                    callback_function(content)  # Callback function, used to obtain results externally
                    input_content = ''
                    continue
+                elif action == 'FINISH TASK':
+                    self.log.info(content)
                else:
                    self.log.critical(f'Unknown action! {action}')
                    break
@@ -126,8 +131,6 @@ class Audit:
            history_messages_key='messages',
        )

-        self.log.debug(f'Chat messages: {input_dict}')
-
        for _ in chain_with_message_history.stream(input_dict, config_dict):
            pass

--- a/audit/language.py
+++ b/audit/language.py
@@ -1,8 +1,9 @@
 LANGUAGE = {
    'c': ['.c'],
-    'c++': ['.cc', '.cpp'],
+    'cpp': ['.cc', '.cpp'],
    'go': ['.go'],
    'php': ['php', 'php3', 'php4', 'php5', 'phtml'],
+    'jsp': ['.jsp'],
    'java': ['.java'],
    'python': ['.py'],
    'javascript': ['.js'],
--- a/audit/prompt.py
+++ b/audit/prompt.py
@@ -1,7 +1,15 @@
 SYSTEM_PROMPT = """
 You are an intelligent code auditor. I will provide you with a source code. Please strictly follow the following requirements to conduct code audit.
-All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.). 
-and it is forbidden to bring "`" when outputting:
+During the audit process, you can refer to Fortify's rule base(Execute Action 3), but it does not have to be completely consistent to determine the existence of a vulnerability. The rule base format provided to you is as follows:
+{
+    'language':
+    'vuln_kingdom':
+    'vuln_category':
+}
+
+Before officially starting the audit, it is recommended to query the Fortify rule base as a reference.
+All your output must strictly follow the following specifications. It is forbidden to output in any other form (including plain text, Markdown, etc.), and it is forbidden to bring "`" when outputting.
+You can choose to perform the following actions:

 1. Query project structure:
 <root>
@@ -15,13 +23,27 @@ and it is forbidden to bring "`" when outputting:
 <content>the absolute path of the file you want to query</content>
 </root>

-3. Output audit results
+3. Query fortify
+<root>
+<action>QUERY FORTIFY</action>
+<content>The language you want to query, options are: c, cpp, go, php, jsp, java, python, javascript</content>
+</root>
+
+4. Output audit results
 <root>
 <action>OUTPUT RESULT</action>
 <content>the audit results you want to output</content>
 </root>

-4. End the audit task
+The output result format is as follows(JSON):
+{
+    "Vulnerability Type": 
+    "Vulnerability File": 
+    "Vulnerability Code Summary": 
+    "Vulnerability repair suggestions":
+} 
+
+5. End the audit task
 <root>
 <action>FINISH TASK</action>
 <content></content>
--- a/fortify_rules.json
+++ b/fortify_rules.json
--- a/main.py
+++ b/main.py
@@ -1,3 +1,4 @@
+import json
 import os
 import warnings
 from audit import Audit
@@ -6,7 +7,9 @@ from audit import Audit
 warnings.simplefilter('ignore', FutureWarning)

 os.environ['OPENAI_API_BASE'] = 'https://yunwu.ai/v1'
-os.environ['OPENAI_API_KEY'] = 'sk-zpkHfWT0Zhvzc79lX11WS4dEyg5CkQ3RdZOSNDoLADaitfVM'
+os.environ['OPENAI_API_KEY'] = 'sk-FdKVL1IiRCMhTVScD4iIEfE2U7978rKuAQhPl0Gbr55l6fDD'
+
+fortify_rules = json.load(open('fortify_rules.json', 'r', encoding='utf-8'))


 def result_callback(result):
@@ -14,10 +17,10 @@ def result_callback(result):


 if __name__ == '__main__':
-    src_root = r'C:\Users\yvling\Desktop\JavaSecLab'
-    language = 'java'
+    src_root = r'C:\Users\yvling\Desktop\PHP-Vuln'
+    language = 'php'

-    audit = Audit()
+    audit = Audit(fortify_rules)
    audit.load_source_files(src_root, language)
    audit.audit(result_callback)

--- a/rules/init.py
+++ b/rules/init.py
--- a/rules/fortify/init.py
+++ b/rules/fortify/init.py
--- a/rules/fortify/fortify.py
+++ b/rules/fortify/fortify.py
@@ -0,0 +1,44 @@
+import json
+import os
+import xml.etree.ElementTree as ET
+
+
+rules_list = []
+
+def extract_rules(xml_file):
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+
+    rules = root.findall('.//{xmlns://www.fortifysoftware.com/schema/rules}StructuralRule')
+
+    for rule in rules:
+        rule_info = {}
+
+        vuln_kingdom = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnKingdom')
+        vuln_category = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnCategory')
+        vuln_subcategory = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}VulnSubcategory')
+        predicate = rule.find('{xmlns://www.fortifysoftware.com/schema/rules}Predicate')
+
+        rule_info['language'] = rule.get('language')
+
+        if rule_info['language'] in ['c', 'cpp', 'go', 'php', 'jsp', 'java', 'python', 'javascript']:
+            rule_info['vuln_kingdom'] = vuln_kingdom.text.replace('        ', ' ') if vuln_kingdom is not None else None
+            rule_info['vuln_category'] = vuln_category.text.replace('        ', ' ') if vuln_category is not None else None
+            rule_info['vuln_subcategory'] = vuln_subcategory.text.replace('        ', ' ') if vuln_subcategory is not None else None
+            rule_info['predicate'] = predicate.text.replace('        ', ' ') if predicate is not None else None
+
+            rules_list.append(rule_info)
+
+
+
+def load_fortify_rules(src_path):
+    for root, dirs, files in os.walk(src_path):
+        for file_name in files:
+            if file_name.endswith('.xml'):
+                file_path = os.path.join(root, file_name)
+                extract_rules(file_path)
+
+    open('../../fortify_rules.json', 'w', encoding='utf-8').write(json.dumps(rules_list))
+
+if __name__ == '__main__':
+    load_fortify_rules(r'C:\Users\yvling\Desktop\data')