254 lines
8.7 KiB
Python
254 lines
8.7 KiB
Python
|
"""
|
||
|
This script extracts information from PHP files and converts it to YAML format.
|
||
|
It processes PHP files in a given source directory, extracts various components
|
||
|
such as dependencies, properties, methods, traits, and interfaces, and then
|
||
|
writes the extracted information to YAML files in a specified destination directory.
|
||
|
"""
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
import yaml
|
||
|
from typing import Dict, List, Any
|
||
|
|
||
|
def parse_php_file(file_path: str) -> Dict[str, Any]:
|
||
|
"""
|
||
|
Parse a PHP file and extract its components.
|
||
|
|
||
|
Args:
|
||
|
file_path (str): Path to the PHP file.
|
||
|
|
||
|
Returns:
|
||
|
Dict[str, Any]: A dictionary containing extracted information:
|
||
|
- name: Name of the PHP file (without extension)
|
||
|
- class_comment: Comment for the class (if any)
|
||
|
- dependencies: List of dependencies (use statements)
|
||
|
- properties: List of class properties
|
||
|
- methods: List of class methods
|
||
|
- traits: List of traits used
|
||
|
- interfaces: List of interfaces implemented
|
||
|
"""
|
||
|
with open(file_path, 'r') as file:
|
||
|
content = file.read()
|
||
|
|
||
|
name = os.path.basename(file_path).split('.')[0]
|
||
|
class_comment = extract_class_comment(content)
|
||
|
dependencies = extract_dependencies(content)
|
||
|
properties = extract_properties(content)
|
||
|
methods = extract_methods(content)
|
||
|
traits = extract_traits(content)
|
||
|
interfaces = extract_interfaces(content)
|
||
|
|
||
|
return {
|
||
|
"name": name,
|
||
|
"class_comment": class_comment,
|
||
|
"dependencies": dependencies,
|
||
|
"properties": properties,
|
||
|
"methods": methods,
|
||
|
"traits": traits,
|
||
|
"interfaces": interfaces
|
||
|
}
|
||
|
|
||
|
def extract_class_comment(content: str) -> str:
|
||
|
"""
|
||
|
Extract the class-level comment from PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
str: Extracted class comment or empty string if not found.
|
||
|
"""
|
||
|
class_comment_pattern = r'/\*\*(.*?)\*/\s*class'
|
||
|
match = re.search(class_comment_pattern, content, re.DOTALL)
|
||
|
if match:
|
||
|
return match.group(1).strip()
|
||
|
return ""
|
||
|
|
||
|
def extract_dependencies(content: str) -> List[Dict[str, str]]:
|
||
|
"""
|
||
|
Extract dependencies (use statements) from PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
List[Dict[str, str]]: List of dictionaries containing dependency information:
|
||
|
- name: Alias or class name
|
||
|
- type: Always "class" for now (might need refinement)
|
||
|
- source: Full namespace of the dependency
|
||
|
"""
|
||
|
# Regex pattern to match use statements, capturing the full namespace and optional alias
|
||
|
use_statements = re.findall(r'use\s+([\w\\]+)(?:\s+as\s+(\w+))?;', content)
|
||
|
dependencies = []
|
||
|
for use in use_statements:
|
||
|
dep = {
|
||
|
"name": use[1] if use[1] else use[0].split('\\')[-1],
|
||
|
"type": "class", # Assuming class for now, might need refinement
|
||
|
"source": use[0]
|
||
|
}
|
||
|
dependencies.append(dep)
|
||
|
return dependencies
|
||
|
|
||
|
def extract_properties(content: str) -> List[Dict[str, Any]]:
|
||
|
"""
|
||
|
Extract class properties and their comments from PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
List[Dict[str, Any]]: List of dictionaries containing property information:
|
||
|
- name: Property name (without $)
|
||
|
- visibility: public, protected, or private
|
||
|
- comment: Property comment (if any)
|
||
|
"""
|
||
|
# Regex pattern to match property declarations with optional comments
|
||
|
property_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(\$\w+)(?:\s*=\s*[^;]+)?;'
|
||
|
properties = re.findall(property_pattern, content, re.DOTALL)
|
||
|
return [
|
||
|
{
|
||
|
"name": prop[2][1:],
|
||
|
"visibility": prop[1],
|
||
|
"comment": prop[0].strip() if prop[0] else None
|
||
|
} for prop in properties
|
||
|
]
|
||
|
|
||
|
def extract_methods(content: str) -> List[Dict[str, Any]]:
|
||
|
"""
|
||
|
Extract class methods and their comments from PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
List[Dict[str, Any]]: List of dictionaries containing method information:
|
||
|
- name: Method name
|
||
|
- visibility: public, protected, or private
|
||
|
- parameters: List of parameter dictionaries
|
||
|
- comment: Method comment (if any)
|
||
|
"""
|
||
|
# Regex pattern to match method declarations with optional comments
|
||
|
method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?function\s+(\w+)\s*\((.*?)\)'
|
||
|
methods = re.findall(method_pattern, content, re.DOTALL)
|
||
|
parsed_methods = []
|
||
|
for method in methods:
|
||
|
parsed_methods.append({
|
||
|
"name": method[2],
|
||
|
"visibility": method[1],
|
||
|
"parameters": parse_parameters(method[3]),
|
||
|
"comment": method[0].strip() if method[0] else None
|
||
|
})
|
||
|
return parsed_methods
|
||
|
|
||
|
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||
|
"""
|
||
|
Parse method parameters from a parameter string.
|
||
|
|
||
|
Args:
|
||
|
params_str (str): String containing method parameters.
|
||
|
|
||
|
Returns:
|
||
|
List[Dict[str, str]]: List of dictionaries containing parameter information:
|
||
|
- name: Parameter name
|
||
|
- default: Default value (if specified)
|
||
|
"""
|
||
|
params = params_str.split(',')
|
||
|
parsed_params = []
|
||
|
for param in params:
|
||
|
param = param.strip()
|
||
|
if param:
|
||
|
parts = param.split('=')
|
||
|
param_dict = {"name": parts[0].split()[-1].strip('$')}
|
||
|
if len(parts) > 1:
|
||
|
param_dict["default"] = parts[1].strip()
|
||
|
parsed_params.append(param_dict)
|
||
|
return parsed_params
|
||
|
|
||
|
def extract_traits(content: str) -> List[str]:
|
||
|
"""
|
||
|
Extract traits used in the PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
List[str]: List of trait names used in the class.
|
||
|
"""
|
||
|
return re.findall(r'use\s+([\w\\]+)(?:,\s*[\w\\]+)*;', content)
|
||
|
|
||
|
def extract_interfaces(content: str) -> List[str]:
|
||
|
"""
|
||
|
Extract interfaces implemented by the class in the PHP content.
|
||
|
|
||
|
Args:
|
||
|
content (str): PHP file content.
|
||
|
|
||
|
Returns:
|
||
|
List[str]: List of interface names implemented by the class.
|
||
|
"""
|
||
|
return re.findall(r'implements\s+([\w\\]+)(?:,\s*[\w\\]+)*', content)
|
||
|
|
||
|
def convert_to_yaml(php_data: Dict[str, Any]) -> str:
|
||
|
"""
|
||
|
Convert extracted PHP data to YAML format.
|
||
|
|
||
|
Args:
|
||
|
php_data (Dict[str, Any]): Dictionary containing extracted PHP data.
|
||
|
|
||
|
Returns:
|
||
|
str: YAML representation of the PHP data.
|
||
|
"""
|
||
|
def format_comment(comment: str) -> str:
|
||
|
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||
|
|
||
|
formatted_data = {}
|
||
|
for key, value in php_data.items():
|
||
|
if key == 'class_comment':
|
||
|
formatted_data['class_comment'] = format_comment(value) if value else None
|
||
|
elif key == 'properties':
|
||
|
formatted_data['properties'] = [
|
||
|
{**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None}
|
||
|
for prop in value
|
||
|
]
|
||
|
elif key == 'methods':
|
||
|
formatted_data['methods'] = [
|
||
|
{**method, 'comment': format_comment(method['comment']) if method.get('comment') else None}
|
||
|
for method in value
|
||
|
]
|
||
|
else:
|
||
|
formatted_data[key] = value
|
||
|
|
||
|
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||
|
|
||
|
def process_directory(source_dir: str, dest_dir: str):
|
||
|
"""
|
||
|
Process all PHP files in the source directory and its subdirectories,
|
||
|
extract information, and save as YAML files in the destination directory.
|
||
|
|
||
|
Args:
|
||
|
source_dir (str): Path to the source directory containing PHP files.
|
||
|
dest_dir (str): Path to the destination directory for YAML files.
|
||
|
"""
|
||
|
for root, dirs, files in os.walk(source_dir):
|
||
|
for file in files:
|
||
|
if file.endswith('.php'):
|
||
|
source_path = os.path.join(root, file)
|
||
|
relative_path = os.path.relpath(source_path, source_dir)
|
||
|
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||
|
|
||
|
os.makedirs(dest_path, exist_ok=True)
|
||
|
|
||
|
php_data = parse_php_file(source_path)
|
||
|
yaml_content = convert_to_yaml(php_data)
|
||
|
|
||
|
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||
|
with open(yaml_file, 'w') as f:
|
||
|
f.write(yaml_content)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
source_directory = "/home/platform/Devboxes/resources/symfony/src/Symfony/"
|
||
|
destination_directory = "/home/platform/Devboxes/platform/api2/"
|
||
|
|
||
|
process_directory(source_directory, destination_directory)
|
||
|
print("Extraction and conversion completed.")
|