add: adding scripts for API extraction across various languages
This commit is contained in:
parent
d794603fb1
commit
f090f59582
11 changed files with 1644 additions and 0 deletions
175
api/extract_csharp_to_yaml.py
Normal file
175
api/extract_csharp_to_yaml.py
Normal file
|
@ -0,0 +1,175 @@
|
|||
"""
|
||||
This script extracts information from C# files and converts it to YAML format.
|
||||
It processes C# files in a given source directory, extracts various components
|
||||
such as namespaces, classes, properties, methods, and interfaces, and then
|
||||
writes the extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_csharp_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a C# file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the C# file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the C# file (without extension)
|
||||
- namespace: Namespace of the class
|
||||
- class_comment: Comment for the class (if any)
|
||||
- using_statements: List of using statements
|
||||
- properties: List of class properties
|
||||
- methods: List of class methods
|
||||
- interfaces: List of interfaces implemented
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
namespace = extract_namespace(content)
|
||||
class_comment = extract_class_comment(content)
|
||||
using_statements = extract_using_statements(content)
|
||||
properties = extract_properties(content)
|
||||
methods = extract_methods(content)
|
||||
interfaces = extract_interfaces(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"class_comment": class_comment,
|
||||
"using_statements": using_statements,
|
||||
"properties": properties,
|
||||
"methods": methods,
|
||||
"interfaces": interfaces
|
||||
}
|
||||
|
||||
def extract_namespace(content: str) -> str:
|
||||
"""
|
||||
Extract the namespace from C# content.
|
||||
"""
|
||||
namespace_pattern = r'namespace\s+([\w.]+)'
|
||||
match = re.search(namespace_pattern, content)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def extract_class_comment(content: str) -> str:
|
||||
"""
|
||||
Extract the class-level comment from C# content.
|
||||
"""
|
||||
class_comment_pattern = r'/\*\*(.*?)\*/\s*(?:public|internal)?\s*class'
|
||||
match = re.search(class_comment_pattern, content, re.DOTALL)
|
||||
return match.group(1).strip() if match else ""
|
||||
|
||||
def extract_using_statements(content: str) -> List[str]:
|
||||
"""
|
||||
Extract using statements from C# content.
|
||||
"""
|
||||
return re.findall(r'using\s+([\w.]+);', content)
|
||||
|
||||
def extract_properties(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class properties and their comments from C# content.
|
||||
"""
|
||||
property_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|private|protected|internal)\s+(?:virtual\s+)?(\w+)\s+(\w+)\s*{\s*get;\s*set;\s*}'
|
||||
properties = re.findall(property_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": prop[3],
|
||||
"type": prop[2],
|
||||
"visibility": prop[1],
|
||||
"comment": prop[0].strip() if prop[0] else None
|
||||
} for prop in properties
|
||||
]
|
||||
|
||||
def extract_methods(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class methods and their comments from C# content.
|
||||
"""
|
||||
method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|private|protected|internal)\s+(?:virtual\s+)?(\w+)\s+(\w+)\s*\((.*?)\)'
|
||||
methods = re.findall(method_pattern, content, re.DOTALL)
|
||||
parsed_methods = []
|
||||
for method in methods:
|
||||
parsed_methods.append({
|
||||
"name": method[3],
|
||||
"return_type": method[2],
|
||||
"visibility": method[1],
|
||||
"parameters": parse_parameters(method[4]),
|
||||
"comment": method[0].strip() if method[0] else None
|
||||
})
|
||||
return parsed_methods
|
||||
|
||||
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Parse method parameters from a parameter string.
|
||||
"""
|
||||
params = params_str.split(',')
|
||||
parsed_params = []
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param:
|
||||
parts = param.split()
|
||||
parsed_params.append({"type": parts[0], "name": parts[1]})
|
||||
return parsed_params
|
||||
|
||||
def extract_interfaces(content: str) -> List[str]:
|
||||
"""
|
||||
Extract interfaces implemented by the class in the C# content.
|
||||
"""
|
||||
interface_pattern = r'class\s+\w+\s*:\s*([\w,\s]+)'
|
||||
match = re.search(interface_pattern, content)
|
||||
if match:
|
||||
return [interface.strip() for interface in match.group(1).split(',')]
|
||||
return []
|
||||
|
||||
def convert_to_yaml(csharp_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted C# data to YAML format.
|
||||
"""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in csharp_data.items():
|
||||
if key == 'class_comment':
|
||||
formatted_data['class_comment'] = format_comment(value) if value else None
|
||||
elif key == 'properties':
|
||||
formatted_data['properties'] = [
|
||||
{**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None}
|
||||
for prop in value
|
||||
]
|
||||
elif key == 'methods':
|
||||
formatted_data['methods'] = [
|
||||
{**method, 'comment': format_comment(method['comment']) if method.get('comment') else None}
|
||||
for method in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all C# files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.cs'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
csharp_data = parse_csharp_file(source_path)
|
||||
yaml_content = convert_to_yaml(csharp_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/csharp/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
127
api/extract_erlang_to_yaml.py
Normal file
127
api/extract_erlang_to_yaml.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
"""
|
||||
This script extracts information from Erlang files and converts it to YAML format.
|
||||
It processes Erlang files in a given source directory, extracts various components
|
||||
such as module name, exports, imports, records, and functions, and then writes the
|
||||
extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_erlang_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse an Erlang file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the Erlang file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the Erlang file (without extension)
|
||||
- module: Module name
|
||||
- exports: List of exported functions
|
||||
- imports: List of imported functions
|
||||
- records: List of record definitions
|
||||
- functions: List of function definitions
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
module = extract_module(content)
|
||||
exports = extract_exports(content)
|
||||
imports = extract_imports(content)
|
||||
records = extract_records(content)
|
||||
functions = extract_functions(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"module": module,
|
||||
"exports": exports,
|
||||
"imports": imports,
|
||||
"records": records,
|
||||
"functions": functions
|
||||
}
|
||||
|
||||
def extract_module(content: str) -> str:
|
||||
"""Extract the module name from Erlang content."""
|
||||
module_pattern = r'-module\(([^)]+)\)'
|
||||
match = re.search(module_pattern, content)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def extract_exports(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract exported functions from Erlang content."""
|
||||
export_pattern = r'-export\(\[(.*?)\]\)'
|
||||
exports = []
|
||||
for match in re.finditer(export_pattern, content):
|
||||
exports.extend(parse_function_exports(match.group(1)))
|
||||
return exports
|
||||
|
||||
def parse_function_exports(export_str: str) -> List[Dict[str, Any]]:
|
||||
"""Parse exported function definitions."""
|
||||
function_pattern = r'(\w+)/(\d+)'
|
||||
return [{"name": match[0], "arity": int(match[1])} for match in re.findall(function_pattern, export_str)]
|
||||
|
||||
def extract_imports(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract imported functions from Erlang content."""
|
||||
import_pattern = r'-import\(([^,]+),\s*\[(.*?)\]\)'
|
||||
imports = []
|
||||
for match in re.finditer(import_pattern, content):
|
||||
module = match.group(1)
|
||||
functions = parse_function_exports(match.group(2))
|
||||
imports.append({"module": module, "functions": functions})
|
||||
return imports
|
||||
|
||||
def extract_records(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract record definitions from Erlang content."""
|
||||
record_pattern = r'-record\((\w+),\s*\{(.*?)\}\)'
|
||||
records = []
|
||||
for match in re.finditer(record_pattern, content):
|
||||
name = match.group(1)
|
||||
fields = [field.strip() for field in match.group(2).split(',')]
|
||||
records.append({"name": name, "fields": fields})
|
||||
return records
|
||||
|
||||
def extract_functions(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract function definitions from Erlang content."""
|
||||
function_pattern = r'(\w+)\((.*?)\)\s*->(.*?)(?=\w+\(|\Z)'
|
||||
functions = []
|
||||
for match in re.finditer(function_pattern, content, re.DOTALL):
|
||||
name = match.group(1)
|
||||
params = [param.strip() for param in match.group(2).split(',')]
|
||||
body = match.group(3).strip()
|
||||
functions.append({
|
||||
"name": name,
|
||||
"parameters": params,
|
||||
"body": body
|
||||
})
|
||||
return functions
|
||||
|
||||
def convert_to_yaml(erlang_data: Dict[str, Any]) -> str:
|
||||
"""Convert extracted Erlang data to YAML format."""
|
||||
return yaml.dump(erlang_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all Erlang files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.erl'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
erlang_data = parse_erlang_file(source_path)
|
||||
yaml_content = convert_to_yaml(erlang_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/erlang/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
198
api/extract_go_to_yaml.py
Normal file
198
api/extract_go_to_yaml.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
"""
|
||||
This script extracts information from Go files and converts it to YAML format.
|
||||
It processes Go files in a given source directory, extracts various components
|
||||
such as imports, structs, interfaces, and functions, and then writes the
|
||||
extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_go_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a Go file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the Go file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the Go file (without extension)
|
||||
- package: Package name
|
||||
- imports: List of import statements
|
||||
- structs: List of struct definitions
|
||||
- interfaces: List of interface definitions
|
||||
- functions: List of function definitions
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
package = extract_package(content)
|
||||
imports = extract_imports(content)
|
||||
structs = extract_structs(content)
|
||||
interfaces = extract_interfaces(content)
|
||||
functions = extract_functions(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"package": package,
|
||||
"imports": imports,
|
||||
"structs": structs,
|
||||
"interfaces": interfaces,
|
||||
"functions": functions
|
||||
}
|
||||
|
||||
def extract_package(content: str) -> str:
|
||||
"""
|
||||
Extract the package name from Go content.
|
||||
"""
|
||||
package_pattern = r'package\s+(\w+)'
|
||||
match = re.search(package_pattern, content)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def extract_imports(content: str) -> List[str]:
|
||||
"""
|
||||
Extract import statements from Go content.
|
||||
"""
|
||||
import_pattern = r'import\s*\((.*?)\)'
|
||||
match = re.search(import_pattern, content, re.DOTALL)
|
||||
if match:
|
||||
imports = re.findall(r'"(.+?)"', match.group(1))
|
||||
return imports
|
||||
return []
|
||||
|
||||
def extract_structs(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract struct definitions from Go content.
|
||||
"""
|
||||
struct_pattern = r'//\s*(.+?)?\n\s*type\s+(\w+)\s+struct\s*{([^}]+)}'
|
||||
structs = re.findall(struct_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": struct[1],
|
||||
"comment": struct[0].strip() if struct[0] else None,
|
||||
"fields": extract_struct_fields(struct[2])
|
||||
} for struct in structs
|
||||
]
|
||||
|
||||
def extract_struct_fields(fields_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract fields from a struct definition.
|
||||
"""
|
||||
field_pattern = r'(\w+)\s+(.+?)(?:`[^`]*`)?$'
|
||||
return [
|
||||
{"name": field[0], "type": field[1].strip()}
|
||||
for field in re.findall(field_pattern, fields_str, re.MULTILINE)
|
||||
]
|
||||
|
||||
def extract_interfaces(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract interface definitions from Go content.
|
||||
"""
|
||||
interface_pattern = r'//\s*(.+?)?\n\s*type\s+(\w+)\s+interface\s*{([^}]+)}'
|
||||
interfaces = re.findall(interface_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": interface[1],
|
||||
"comment": interface[0].strip() if interface[0] else None,
|
||||
"methods": extract_interface_methods(interface[2])
|
||||
} for interface in interfaces
|
||||
]
|
||||
|
||||
def extract_interface_methods(interface_content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract method signatures from an interface definition.
|
||||
"""
|
||||
method_pattern = r'(\w+)\((.*?)\)\s*(.*?)(?:\s*//.*)?$'
|
||||
methods = re.findall(method_pattern, interface_content, re.MULTILINE)
|
||||
return [
|
||||
{
|
||||
"name": method[0],
|
||||
"parameters": parse_parameters(method[1]),
|
||||
"return_type": method[2].strip() if method[2] else None
|
||||
} for method in methods
|
||||
]
|
||||
|
||||
def extract_functions(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract function definitions from Go content.
|
||||
"""
|
||||
function_pattern = r'//\s*(.+?)?\n\s*func\s+(\w+)\s*\((.*?)\)\s*(.*?)\s*{'
|
||||
functions = re.findall(function_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": function[1],
|
||||
"comment": function[0].strip() if function[0] else None,
|
||||
"receiver": extract_receiver(function[2]),
|
||||
"parameters": parse_parameters(function[2]),
|
||||
"return_type": function[3].strip() if function[3] else None
|
||||
} for function in functions
|
||||
]
|
||||
|
||||
def extract_receiver(params_str: str) -> Dict[str, str]:
|
||||
"""
|
||||
Extract the receiver from a method signature.
|
||||
"""
|
||||
receiver_pattern = r'(\w+)\s+\*?(\w+)'
|
||||
match = re.match(receiver_pattern, params_str)
|
||||
if match:
|
||||
return {"name": match.group(1), "type": match.group(2)}
|
||||
return {}
|
||||
|
||||
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Parse function parameters from a parameter string.
|
||||
"""
|
||||
params = params_str.split(',')
|
||||
parsed_params = []
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param and not re.match(r'^\w+\s+\*?\w+$', param): # Skip receiver
|
||||
parts = param.split()
|
||||
parsed_params.append({"name": parts[0], "type": ' '.join(parts[1:])})
|
||||
return parsed_params
|
||||
|
||||
def convert_to_yaml(go_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted Go data to YAML format.
|
||||
"""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in go_data.items():
|
||||
if key in ['structs', 'interfaces', 'functions']:
|
||||
formatted_data[key] = [
|
||||
{**item, 'comment': format_comment(item['comment']) if item.get('comment') else None}
|
||||
for item in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all Go files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.go'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
go_data = parse_go_file(source_path)
|
||||
yaml_content = convert_to_yaml(go_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/go/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
171
api/extract_java_to_yaml.py
Normal file
171
api/extract_java_to_yaml.py
Normal file
|
@ -0,0 +1,171 @@
|
|||
"""
|
||||
This script extracts information from Java files and converts it to YAML format.
|
||||
It processes Java files in a given source directory, extracts various components
|
||||
such as package, imports, class info, fields, methods, and interfaces, and then
|
||||
writes the extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_java_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a Java file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the Java file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the Java file (without extension)
|
||||
- package: Package declaration
|
||||
- imports: List of import statements
|
||||
- class_info: Information about the class (name, modifiers, extends, implements)
|
||||
- class_comment: Comment for the class (if any)
|
||||
- fields: List of class fields
|
||||
- methods: List of class methods
|
||||
- interfaces: List of interfaces implemented
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
package = extract_package(content)
|
||||
imports = extract_imports(content)
|
||||
class_info = extract_class_info(content)
|
||||
class_comment = extract_class_comment(content)
|
||||
fields = extract_fields(content)
|
||||
methods = extract_methods(content)
|
||||
interfaces = extract_interfaces(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"package": package,
|
||||
"imports": imports,
|
||||
"class_info": class_info,
|
||||
"class_comment": class_comment,
|
||||
"fields": fields,
|
||||
"methods": methods,
|
||||
"interfaces": interfaces
|
||||
}
|
||||
|
||||
def extract_package(content: str) -> str:
|
||||
"""Extract the package declaration from Java content."""
|
||||
package_pattern = r'package\s+([\w.]+);'
|
||||
match = re.search(package_pattern, content)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def extract_imports(content: str) -> List[str]:
|
||||
"""Extract import statements from Java content."""
|
||||
import_pattern = r'import\s+([\w.]+);'
|
||||
return re.findall(import_pattern, content)
|
||||
|
||||
def extract_class_info(content: str) -> Dict[str, Any]:
|
||||
"""Extract class information from Java content."""
|
||||
class_pattern = r'(public\s+)?(abstract\s+)?(final\s+)?class\s+(\w+)(\s+extends\s+\w+)?(\s+implements\s+[\w,\s]+)?'
|
||||
match = re.search(class_pattern, content)
|
||||
if match:
|
||||
return {
|
||||
"name": match.group(4),
|
||||
"modifiers": [mod for mod in [match.group(1), match.group(2), match.group(3)] if mod],
|
||||
"extends": match.group(5).split()[-1] if match.group(5) else None,
|
||||
"implements": match.group(6).split()[-1].split(',') if match.group(6) else []
|
||||
}
|
||||
return {}
|
||||
|
||||
def extract_class_comment(content: str) -> str:
|
||||
"""Extract the class-level comment from Java content."""
|
||||
class_comment_pattern = r'/\*\*(.*?)\*/\s*(?:public\s+)?(?:abstract\s+)?(?:final\s+)?class'
|
||||
match = re.search(class_comment_pattern, content, re.DOTALL)
|
||||
return match.group(1).strip() if match else ""
|
||||
|
||||
def extract_fields(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract class fields from Java content."""
|
||||
field_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(?:final\s+)?(\w+)\s+(\w+)(?:\s*=\s*[^;]+)?;'
|
||||
fields = re.findall(field_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": field[3],
|
||||
"type": field[2],
|
||||
"visibility": field[1],
|
||||
"comment": field[0].strip() if field[0] else None
|
||||
} for field in fields
|
||||
]
|
||||
|
||||
def extract_methods(content: str) -> List[Dict[str, Any]]:
|
||||
"""Extract class methods from Java content."""
|
||||
method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s+(\w+)\s*\((.*?)\)'
|
||||
methods = re.findall(method_pattern, content, re.DOTALL)
|
||||
parsed_methods = []
|
||||
for method in methods:
|
||||
parsed_methods.append({
|
||||
"name": method[3],
|
||||
"return_type": method[2],
|
||||
"visibility": method[1],
|
||||
"parameters": parse_parameters(method[4]),
|
||||
"comment": method[0].strip() if method[0] else None
|
||||
})
|
||||
return parsed_methods
|
||||
|
||||
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||||
"""Parse method parameters from a parameter string."""
|
||||
params = params_str.split(',')
|
||||
parsed_params = []
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param:
|
||||
parts = param.split()
|
||||
parsed_params.append({"type": parts[0], "name": parts[1]})
|
||||
return parsed_params
|
||||
|
||||
def extract_interfaces(content: str) -> List[str]:
|
||||
"""Extract interfaces implemented by the class in the Java content."""
|
||||
interface_pattern = r'implements\s+([\w,\s]+)'
|
||||
match = re.search(interface_pattern, content)
|
||||
if match:
|
||||
return [interface.strip() for interface in match.group(1).split(',')]
|
||||
return []
|
||||
|
||||
def convert_to_yaml(java_data: Dict[str, Any]) -> str:
|
||||
"""Convert extracted Java data to YAML format."""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in java_data.items():
|
||||
if key == 'class_comment':
|
||||
formatted_data['class_comment'] = format_comment(value) if value else None
|
||||
elif key in ['fields', 'methods']:
|
||||
formatted_data[key] = [
|
||||
{**item, 'comment': format_comment(item['comment']) if item.get('comment') else None}
|
||||
for item in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all Java files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.java'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
java_data = parse_java_file(source_path)
|
||||
yaml_content = convert_to_yaml(java_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/java/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
149
api/extract_javascript_to_yaml.py
Normal file
149
api/extract_javascript_to_yaml.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
"""
|
||||
This script extracts information from JavaScript files and converts it to YAML format.
|
||||
It processes JavaScript files in a given source directory, extracts various components
|
||||
such as imports, classes, properties, and methods, and then writes the extracted
|
||||
information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_javascript_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a JavaScript file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the JavaScript file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the JavaScript file (without extension)
|
||||
- imports: List of import statements
|
||||
- class_comment: Comment for the class (if any)
|
||||
- class_name: Name of the class
|
||||
- properties: List of class properties
|
||||
- methods: List of class methods
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
imports = extract_imports(content)
|
||||
class_comment, class_name = extract_class_info(content)
|
||||
properties = extract_properties(content)
|
||||
methods = extract_methods(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"imports": imports,
|
||||
"class_comment": class_comment,
|
||||
"class_name": class_name,
|
||||
"properties": properties,
|
||||
"methods": methods
|
||||
}
|
||||
|
||||
def extract_imports(content: str) -> List[str]:
|
||||
"""
|
||||
Extract import statements from JavaScript content.
|
||||
"""
|
||||
import_pattern = r'import\s+.*?from\s+[\'"].*?[\'"];'
|
||||
return re.findall(import_pattern, content)
|
||||
|
||||
def extract_class_info(content: str) -> tuple:
|
||||
"""
|
||||
Extract class comment and name from JavaScript content.
|
||||
"""
|
||||
class_pattern = r'(/\*\*(.*?)\*/\s*)?class\s+(\w+)'
|
||||
match = re.search(class_pattern, content, re.DOTALL)
|
||||
if match:
|
||||
comment = match.group(2).strip() if match.group(2) else ""
|
||||
name = match.group(3)
|
||||
return comment, name
|
||||
return "", ""
|
||||
|
||||
def extract_properties(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class properties from JavaScript content.
|
||||
"""
|
||||
property_pattern = r'(/\*\*(.*?)\*/\s*)?(static\s+)?(\w+)\s*=\s*'
|
||||
properties = re.findall(property_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": prop[3],
|
||||
"static": bool(prop[2]),
|
||||
"comment": prop[1].strip() if prop[1] else None
|
||||
} for prop in properties
|
||||
]
|
||||
|
||||
def extract_methods(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class methods from JavaScript content.
|
||||
"""
|
||||
method_pattern = r'(/\*\*(.*?)\*/\s*)?(static\s+)?(\w+)\s*\((.*?)\)\s*{'
|
||||
methods = re.findall(method_pattern, content, re.DOTALL)
|
||||
parsed_methods = []
|
||||
for method in methods:
|
||||
parsed_methods.append({
|
||||
"name": method[3],
|
||||
"static": bool(method[2]),
|
||||
"parameters": parse_parameters(method[4]),
|
||||
"comment": method[1].strip() if method[1] else None
|
||||
})
|
||||
return parsed_methods
|
||||
|
||||
def parse_parameters(params_str: str) -> List[str]:
|
||||
"""
|
||||
Parse method parameters from a parameter string.
|
||||
"""
|
||||
return [param.strip() for param in params_str.split(',') if param.strip()]
|
||||
|
||||
def convert_to_yaml(js_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted JavaScript data to YAML format.
|
||||
"""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in js_data.items():
|
||||
if key == 'class_comment':
|
||||
formatted_data['class_comment'] = format_comment(value) if value else None
|
||||
elif key == 'properties':
|
||||
formatted_data['properties'] = [
|
||||
{**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None}
|
||||
for prop in value
|
||||
]
|
||||
elif key == 'methods':
|
||||
formatted_data['methods'] = [
|
||||
{**method, 'comment': format_comment(method['comment']) if method.get('comment') else None}
|
||||
for method in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all JavaScript files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.js'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
js_data = parse_javascript_file(source_path)
|
||||
yaml_content = convert_to_yaml(js_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/javascript/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
253
api/extract_php_to_yaml.py
Normal file
253
api/extract_php_to_yaml.py
Normal file
|
@ -0,0 +1,253 @@
|
|||
"""
|
||||
This script extracts information from PHP files and converts it to YAML format.
|
||||
It processes PHP files in a given source directory, extracts various components
|
||||
such as dependencies, properties, methods, traits, and interfaces, and then
|
||||
writes the extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_php_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a PHP file and extract its components.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the PHP file.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the PHP file (without extension)
|
||||
- class_comment: Comment for the class (if any)
|
||||
- dependencies: List of dependencies (use statements)
|
||||
- properties: List of class properties
|
||||
- methods: List of class methods
|
||||
- traits: List of traits used
|
||||
- interfaces: List of interfaces implemented
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
class_comment = extract_class_comment(content)
|
||||
dependencies = extract_dependencies(content)
|
||||
properties = extract_properties(content)
|
||||
methods = extract_methods(content)
|
||||
traits = extract_traits(content)
|
||||
interfaces = extract_interfaces(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"class_comment": class_comment,
|
||||
"dependencies": dependencies,
|
||||
"properties": properties,
|
||||
"methods": methods,
|
||||
"traits": traits,
|
||||
"interfaces": interfaces
|
||||
}
|
||||
|
||||
def extract_class_comment(content: str) -> str:
|
||||
"""
|
||||
Extract the class-level comment from PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
str: Extracted class comment or empty string if not found.
|
||||
"""
|
||||
class_comment_pattern = r'/\*\*(.*?)\*/\s*class'
|
||||
match = re.search(class_comment_pattern, content, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return ""
|
||||
|
||||
def extract_dependencies(content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract dependencies (use statements) from PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: List of dictionaries containing dependency information:
|
||||
- name: Alias or class name
|
||||
- type: Always "class" for now (might need refinement)
|
||||
- source: Full namespace of the dependency
|
||||
"""
|
||||
# Regex pattern to match use statements, capturing the full namespace and optional alias
|
||||
use_statements = re.findall(r'use\s+([\w\\]+)(?:\s+as\s+(\w+))?;', content)
|
||||
dependencies = []
|
||||
for use in use_statements:
|
||||
dep = {
|
||||
"name": use[1] if use[1] else use[0].split('\\')[-1],
|
||||
"type": "class", # Assuming class for now, might need refinement
|
||||
"source": use[0]
|
||||
}
|
||||
dependencies.append(dep)
|
||||
return dependencies
|
||||
|
||||
def extract_properties(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class properties and their comments from PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of dictionaries containing property information:
|
||||
- name: Property name (without $)
|
||||
- visibility: public, protected, or private
|
||||
- comment: Property comment (if any)
|
||||
"""
|
||||
# Regex pattern to match property declarations with optional comments
|
||||
property_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(\$\w+)(?:\s*=\s*[^;]+)?;'
|
||||
properties = re.findall(property_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": prop[2][1:],
|
||||
"visibility": prop[1],
|
||||
"comment": prop[0].strip() if prop[0] else None
|
||||
} for prop in properties
|
||||
]
|
||||
|
||||
def extract_methods(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class methods and their comments from PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of dictionaries containing method information:
|
||||
- name: Method name
|
||||
- visibility: public, protected, or private
|
||||
- parameters: List of parameter dictionaries
|
||||
- comment: Method comment (if any)
|
||||
"""
|
||||
# Regex pattern to match method declarations with optional comments
|
||||
method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?function\s+(\w+)\s*\((.*?)\)'
|
||||
methods = re.findall(method_pattern, content, re.DOTALL)
|
||||
parsed_methods = []
|
||||
for method in methods:
|
||||
parsed_methods.append({
|
||||
"name": method[2],
|
||||
"visibility": method[1],
|
||||
"parameters": parse_parameters(method[3]),
|
||||
"comment": method[0].strip() if method[0] else None
|
||||
})
|
||||
return parsed_methods
|
||||
|
||||
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Parse method parameters from a parameter string.
|
||||
|
||||
Args:
|
||||
params_str (str): String containing method parameters.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: List of dictionaries containing parameter information:
|
||||
- name: Parameter name
|
||||
- default: Default value (if specified)
|
||||
"""
|
||||
params = params_str.split(',')
|
||||
parsed_params = []
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param:
|
||||
parts = param.split('=')
|
||||
param_dict = {"name": parts[0].split()[-1].strip('$')}
|
||||
if len(parts) > 1:
|
||||
param_dict["default"] = parts[1].strip()
|
||||
parsed_params.append(param_dict)
|
||||
return parsed_params
|
||||
|
||||
def extract_traits(content: str) -> List[str]:
|
||||
"""
|
||||
Extract traits used in the PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
List[str]: List of trait names used in the class.
|
||||
"""
|
||||
return re.findall(r'use\s+([\w\\]+)(?:,\s*[\w\\]+)*;', content)
|
||||
|
||||
def extract_interfaces(content: str) -> List[str]:
|
||||
"""
|
||||
Extract interfaces implemented by the class in the PHP content.
|
||||
|
||||
Args:
|
||||
content (str): PHP file content.
|
||||
|
||||
Returns:
|
||||
List[str]: List of interface names implemented by the class.
|
||||
"""
|
||||
return re.findall(r'implements\s+([\w\\]+)(?:,\s*[\w\\]+)*', content)
|
||||
|
||||
def convert_to_yaml(php_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted PHP data to YAML format.
|
||||
|
||||
Args:
|
||||
php_data (Dict[str, Any]): Dictionary containing extracted PHP data.
|
||||
|
||||
Returns:
|
||||
str: YAML representation of the PHP data.
|
||||
"""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in php_data.items():
|
||||
if key == 'class_comment':
|
||||
formatted_data['class_comment'] = format_comment(value) if value else None
|
||||
elif key == 'properties':
|
||||
formatted_data['properties'] = [
|
||||
{**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None}
|
||||
for prop in value
|
||||
]
|
||||
elif key == 'methods':
|
||||
formatted_data['methods'] = [
|
||||
{**method, 'comment': format_comment(method['comment']) if method.get('comment') else None}
|
||||
for method in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all PHP files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
|
||||
Args:
|
||||
source_dir (str): Path to the source directory containing PHP files.
|
||||
dest_dir (str): Path to the destination directory for YAML files.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.php'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
php_data = parse_php_file(source_path)
|
||||
yaml_content = convert_to_yaml(php_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/home/platform/Devboxes/resources/laravel_framework/src/Illuminate/"
|
||||
destination_directory = "/home/platform/Devboxes/platform/api/"
|
||||
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
165
api/extract_python_to_yaml.py
Normal file
165
api/extract_python_to_yaml.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
"""
|
||||
This script extracts information from Python files and converts it to YAML format.
|
||||
It processes Python files in a given source directory, extracts various components
|
||||
such as imports, classes, methods, and properties, and then writes the extracted
|
||||
information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import ast
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_python_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a Python file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the Python file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the Python file (without extension)
|
||||
- class_comment: Comment for the class (if any)
|
||||
- imports: List of import statements
|
||||
- classes: List of class information
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
tree = ast.parse(content)
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
imports = extract_imports(tree)
|
||||
classes = extract_classes(tree)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"imports": imports,
|
||||
"classes": classes
|
||||
}
|
||||
|
||||
def extract_imports(tree: ast.AST) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract import statements from Python AST.
|
||||
Args:
|
||||
tree (ast.AST): Python abstract syntax tree.
|
||||
Returns:
|
||||
List[Dict[str, str]]: List of dictionaries containing import information:
|
||||
- name: Imported name
|
||||
- source: Module source
|
||||
"""
|
||||
imports = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
imports.append({"name": alias.name, "source": alias.name})
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
module = node.module
|
||||
for alias in node.names:
|
||||
imports.append({"name": alias.name, "source": f"{module}.{alias.name}"})
|
||||
return imports
|
||||
|
||||
def extract_classes(tree: ast.AST) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract class information from Python AST.
|
||||
Args:
|
||||
tree (ast.AST): Python abstract syntax tree.
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of dictionaries containing class information:
|
||||
- name: Class name
|
||||
- comment: Class docstring (if any)
|
||||
- bases: List of base classes
|
||||
- methods: List of method information
|
||||
- properties: List of class properties
|
||||
"""
|
||||
classes = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
class_info = {
|
||||
"name": node.name,
|
||||
"comment": ast.get_docstring(node),
|
||||
"bases": [base.id for base in node.bases if isinstance(base, ast.Name)],
|
||||
"methods": extract_methods(node),
|
||||
"properties": extract_properties(node)
|
||||
}
|
||||
classes.append(class_info)
|
||||
return classes
|
||||
|
||||
def extract_methods(class_node: ast.ClassDef) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract method information from a class node.
|
||||
Args:
|
||||
class_node (ast.ClassDef): Class definition node.
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of dictionaries containing method information:
|
||||
- name: Method name
|
||||
- comment: Method docstring (if any)
|
||||
- parameters: List of parameter names
|
||||
"""
|
||||
methods = []
|
||||
for node in class_node.body:
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
method_info = {
|
||||
"name": node.name,
|
||||
"comment": ast.get_docstring(node),
|
||||
"parameters": [arg.arg for arg in node.args.args if arg.arg != 'self']
|
||||
}
|
||||
methods.append(method_info)
|
||||
return methods
|
||||
|
||||
def extract_properties(class_node: ast.ClassDef) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract property information from a class node.
|
||||
Args:
|
||||
class_node (ast.ClassDef): Class definition node.
|
||||
Returns:
|
||||
List[Dict[str, str]]: List of dictionaries containing property information:
|
||||
- name: Property name
|
||||
- type: Property type (if annotated)
|
||||
"""
|
||||
properties = []
|
||||
for node in class_node.body:
|
||||
if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
||||
prop_info = {
|
||||
"name": node.target.id,
|
||||
"type": ast.unparse(node.annotation) if node.annotation else None
|
||||
}
|
||||
properties.append(prop_info)
|
||||
return properties
|
||||
|
||||
def convert_to_yaml(python_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted Python data to YAML format.
|
||||
Args:
|
||||
python_data (Dict[str, Any]): Dictionary containing extracted Python data.
|
||||
Returns:
|
||||
str: YAML representation of the Python data.
|
||||
"""
|
||||
return yaml.dump(python_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all Python files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
Args:
|
||||
source_dir (str): Path to the source directory containing Python files.
|
||||
dest_dir (str): Path to the destination directory for YAML files.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
python_data = parse_python_file(source_path)
|
||||
yaml_content = convert_to_yaml(python_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/python/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
215
api/extract_rust_to_yaml.py
Normal file
215
api/extract_rust_to_yaml.py
Normal file
|
@ -0,0 +1,215 @@
|
|||
"""
|
||||
This script extracts information from Rust files and converts it to YAML format.
|
||||
It processes Rust files in a given source directory, extracts various components
|
||||
such as dependencies, structs, impl blocks, traits, and functions, and then
|
||||
writes the extracted information to YAML files in a specified destination directory.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
|
||||
def parse_rust_file(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse a Rust file and extract its components.
|
||||
Args:
|
||||
file_path (str): Path to the Rust file.
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing extracted information:
|
||||
- name: Name of the Rust file (without extension)
|
||||
- module_comment: Comment for the module (if any)
|
||||
- dependencies: List of dependencies (use statements)
|
||||
- structs: List of struct definitions
|
||||
- impls: List of impl blocks
|
||||
- traits: List of trait definitions
|
||||
- functions: List of standalone functions
|
||||
"""
|
||||
with open(file_path, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
name = os.path.basename(file_path).split('.')[0]
|
||||
module_comment = extract_module_comment(content)
|
||||
dependencies = extract_dependencies(content)
|
||||
structs = extract_structs(content)
|
||||
impls = extract_impls(content)
|
||||
traits = extract_traits(content)
|
||||
functions = extract_functions(content)
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"module_comment": module_comment,
|
||||
"dependencies": dependencies,
|
||||
"structs": structs,
|
||||
"impls": impls,
|
||||
"traits": traits,
|
||||
"functions": functions
|
||||
}
|
||||
|
||||
def extract_module_comment(content: str) -> str:
|
||||
"""
|
||||
Extract the module-level comment from Rust content.
|
||||
"""
|
||||
module_comment_pattern = r'^//!(.+?)(?=\n\S)'
|
||||
match = re.search(module_comment_pattern, content, re.DOTALL | re.MULTILINE)
|
||||
return match.group(1).strip() if match else ""
|
||||
|
||||
def extract_dependencies(content: str) -> List[str]:
|
||||
"""
|
||||
Extract dependencies (use statements) from Rust content.
|
||||
"""
|
||||
return re.findall(r'use\s+([\w:]+)(?:::\{.*?\})?;', content)
|
||||
|
||||
def extract_structs(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract struct definitions from Rust content.
|
||||
"""
|
||||
struct_pattern = r'///(.+?)?\n\s*pub struct (\w+)(?:<.*?>)?\s*\{([^}]+)\}'
|
||||
structs = re.findall(struct_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": struct[1],
|
||||
"comment": struct[0].strip() if struct[0] else None,
|
||||
"fields": extract_struct_fields(struct[2])
|
||||
} for struct in structs
|
||||
]
|
||||
|
||||
def extract_struct_fields(fields_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract fields from a struct definition.
|
||||
"""
|
||||
field_pattern = r'pub (\w+):\s*(.+)'
|
||||
return [
|
||||
{"name": field[0], "type": field[1].strip()}
|
||||
for field in re.findall(field_pattern, fields_str)
|
||||
]
|
||||
|
||||
def extract_impls(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract impl blocks from Rust content.
|
||||
"""
|
||||
impl_pattern = r'impl(?:<.*?>)?\s+(\w+)\s*(?:for\s+(\w+))?\s*\{([^}]+)\}'
|
||||
impls = re.findall(impl_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"struct": impl[0],
|
||||
"trait": impl[1] if impl[1] else None,
|
||||
"methods": extract_methods(impl[2])
|
||||
} for impl in impls
|
||||
]
|
||||
|
||||
def extract_methods(impl_content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract methods from an impl block.
|
||||
"""
|
||||
method_pattern = r'///(.+?)?\n\s*pub fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^{]+))?\s*\{'
|
||||
methods = re.findall(method_pattern, impl_content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": method[1],
|
||||
"comment": method[0].strip() if method[0] else None,
|
||||
"parameters": parse_parameters(method[2]),
|
||||
"return_type": method[3].strip() if method[3] else None
|
||||
} for method in methods
|
||||
]
|
||||
|
||||
def parse_parameters(params_str: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Parse method parameters from a parameter string.
|
||||
"""
|
||||
params = params_str.split(',')
|
||||
parsed_params = []
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param:
|
||||
parts = param.split(':')
|
||||
parsed_params.append({"name": parts[0].strip(), "type": parts[1].strip()})
|
||||
return parsed_params
|
||||
|
||||
def extract_traits(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract trait definitions from Rust content.
|
||||
"""
|
||||
trait_pattern = r'pub trait (\w+)(?:<.*?>)?\s*\{([^}]+)\}'
|
||||
traits = re.findall(trait_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": trait[0],
|
||||
"methods": extract_trait_methods(trait[1])
|
||||
} for trait in traits
|
||||
]
|
||||
|
||||
def extract_trait_methods(trait_content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Extract method signatures from a trait definition.
|
||||
"""
|
||||
method_pattern = r'fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^;]+))?;'
|
||||
methods = re.findall(method_pattern, trait_content)
|
||||
return [
|
||||
{
|
||||
"name": method[0],
|
||||
"parameters": parse_parameters(method[1]),
|
||||
"return_type": method[2].strip() if method[2] else None
|
||||
} for method in methods
|
||||
]
|
||||
|
||||
def extract_functions(content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract standalone functions from Rust content.
|
||||
"""
|
||||
function_pattern = r'///(.+?)?\n\s*pub fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^{]+))?\s*\{'
|
||||
functions = re.findall(function_pattern, content, re.DOTALL)
|
||||
return [
|
||||
{
|
||||
"name": function[1],
|
||||
"comment": function[0].strip() if function[0] else None,
|
||||
"parameters": parse_parameters(function[2]),
|
||||
"return_type": function[3].strip() if function[3] else None
|
||||
} for function in functions
|
||||
]
|
||||
|
||||
def convert_to_yaml(rust_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert extracted Rust data to YAML format.
|
||||
"""
|
||||
def format_comment(comment: str) -> str:
|
||||
return '\n'.join('# ' + line.strip() for line in comment.split('\n'))
|
||||
|
||||
formatted_data = {}
|
||||
for key, value in rust_data.items():
|
||||
if key == 'module_comment':
|
||||
formatted_data['module_comment'] = format_comment(value) if value else None
|
||||
elif key in ['structs', 'impls', 'traits', 'functions']:
|
||||
formatted_data[key] = [
|
||||
{**item, 'comment': format_comment(item['comment']) if item.get('comment') else None}
|
||||
for item in value
|
||||
]
|
||||
else:
|
||||
formatted_data[key] = value
|
||||
|
||||
return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
def process_directory(source_dir: str, dest_dir: str):
|
||||
"""
|
||||
Process all Rust files in the source directory and its subdirectories,
|
||||
extract information, and save as YAML files in the destination directory.
|
||||
"""
|
||||
for root, dirs, files in os.walk(source_dir):
|
||||
for file in files:
|
||||
if file.endswith('.rs'):
|
||||
source_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(source_path, source_dir)
|
||||
dest_path = os.path.join(dest_dir, os.path.dirname(relative_path))
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
rust_data = parse_rust_file(source_path)
|
||||
yaml_content = convert_to_yaml(rust_data)
|
||||
|
||||
yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml")
|
||||
with open(yaml_file, 'w') as f:
|
||||
f.write(yaml_content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
source_directory = "/path/to/rust/source/directory"
|
||||
destination_directory = "/path/to/yaml/destination/directory"
|
||||
process_directory(source_directory, destination_directory)
|
||||
print("Extraction and conversion completed.")
|
191
api/extract_typescript_to_yaml.py
Normal file
191
api/extract_typescript_to_yaml.py
Normal file
|
@ -0,0 +1,191 @@
|
|||
/**
|
||||
* This script extracts information from TypeScript files and converts it to YAML format.
|
||||
* It processes TypeScript files in a given source directory, extracts various components
|
||||
* such as imports, classes, methods, and properties, and then writes the extracted
|
||||
* information to YAML files in a specified destination directory.
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as ts from 'typescript';
|
||||
import * as yaml from 'js-yaml';
|
||||
|
||||
interface FileData {
|
||||
name: string;
|
||||
imports: Import[];
|
||||
classes: ClassInfo[];
|
||||
}
|
||||
|
||||
interface Import {
|
||||
name: string;
|
||||
source: string;
|
||||
}
|
||||
|
||||
interface ClassInfo {
|
||||
name: string;
|
||||
comment?: string;
|
||||
extends?: string[];
|
||||
implements?: string[];
|
||||
methods: MethodInfo[];
|
||||
properties: PropertyInfo[];
|
||||
}
|
||||
|
||||
interface MethodInfo {
|
||||
name: string;
|
||||
comment?: string;
|
||||
parameters: ParameterInfo[];
|
||||
returnType?: string;
|
||||
}
|
||||
|
||||
interface ParameterInfo {
|
||||
name: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
interface PropertyInfo {
|
||||
name: string;
|
||||
type?: string;
|
||||
visibility: string;
|
||||
}
|
||||
|
||||
function parseTypeScriptFile(filePath: string): FileData {
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true);
|
||||
|
||||
const name = path.basename(filePath).split('.')[0];
|
||||
const imports = extractImports(sourceFile);
|
||||
const classes = extractClasses(sourceFile);
|
||||
|
||||
return { name, imports, classes };
|
||||
}
|
||||
|
||||
function extractImports(sourceFile: ts.SourceFile): Import[] {
|
||||
const imports: Import[] = [];
|
||||
|
||||
ts.forEachChild(sourceFile, node => {
|
||||
if (ts.isImportDeclaration(node)) {
|
||||
const importClause = node.importClause;
|
||||
const moduleSpecifier = node.moduleSpecifier;
|
||||
|
||||
if (importClause && ts.isStringLiteral(moduleSpecifier)) {
|
||||
const name = importClause.name?.text ?? '*';
|
||||
const source = moduleSpecifier.text;
|
||||
imports.push({ name, source });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return imports;
|
||||
}
|
||||
|
||||
function extractClasses(sourceFile: ts.SourceFile): ClassInfo[] {
|
||||
const classes: ClassInfo[] = [];
|
||||
|
||||
ts.forEachChild(sourceFile, node => {
|
||||
if (ts.isClassDeclaration(node) && node.name) {
|
||||
const classInfo: ClassInfo = {
|
||||
name: node.name.text,
|
||||
comment: getLeadingCommentText(node),
|
||||
extends: node.heritageClauses?.filter(clause => clause.token === ts.SyntaxKind.ExtendsKeyword)
|
||||
.flatMap(clause => clause.types.map(t => t.getText())),
|
||||
implements: node.heritageClauses?.filter(clause => clause.token === ts.SyntaxKind.ImplementsKeyword)
|
||||
.flatMap(clause => clause.types.map(t => t.getText())),
|
||||
methods: extractMethods(node),
|
||||
properties: extractProperties(node)
|
||||
};
|
||||
classes.push(classInfo);
|
||||
}
|
||||
});
|
||||
|
||||
return classes;
|
||||
}
|
||||
|
||||
function extractMethods(classNode: ts.ClassDeclaration): MethodInfo[] {
|
||||
const methods: MethodInfo[] = [];
|
||||
|
||||
classNode.members.forEach(member => {
|
||||
if (ts.isMethodDeclaration(member) && member.name) {
|
||||
const methodInfo: MethodInfo = {
|
||||
name: member.name.getText(),
|
||||
comment: getLeadingCommentText(member),
|
||||
parameters: extractParameters(member),
|
||||
returnType: member.type ? member.type.getText() : undefined
|
||||
};
|
||||
methods.push(methodInfo);
|
||||
}
|
||||
});
|
||||
|
||||
return methods;
|
||||
}
|
||||
|
||||
function extractParameters(method: ts.MethodDeclaration): ParameterInfo[] {
|
||||
return method.parameters.map(param => ({
|
||||
name: param.name.getText(),
|
||||
type: param.type ? param.type.getText() : undefined
|
||||
}));
|
||||
}
|
||||
|
||||
function extractProperties(classNode: ts.ClassDeclaration): PropertyInfo[] {
|
||||
const properties: PropertyInfo[] = [];
|
||||
|
||||
classNode.members.forEach(member => {
|
||||
if (ts.isPropertyDeclaration(member) && member.name) {
|
||||
const propertyInfo: PropertyInfo = {
|
||||
name: member.name.getText(),
|
||||
type: member.type ? member.type.getText() : undefined,
|
||||
visibility: getVisibility(member)
|
||||
};
|
||||
properties.push(propertyInfo);
|
||||
}
|
||||
});
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
function getVisibility(node: ts.Node): string {
|
||||
if (node.modifiers) {
|
||||
if (node.modifiers.some(m => m.kind === ts.SyntaxKind.PrivateKeyword)) return 'private';
|
||||
if (node.modifiers.some(m => m.kind === ts.SyntaxKind.ProtectedKeyword)) return 'protected';
|
||||
if (node.modifiers.some(m => m.kind === ts.SyntaxKind.PublicKeyword)) return 'public';
|
||||
}
|
||||
return 'public'; // Default visibility in TypeScript
|
||||
}
|
||||
|
||||
function getLeadingCommentText(node: ts.Node): string | undefined {
|
||||
const fullText = node.getFullText();
|
||||
const trivia = fullText.substring(0, node.getLeadingTriviaWidth());
|
||||
const commentRanges = ts.getLeadingCommentRanges(trivia, 0);
|
||||
|
||||
if (commentRanges && commentRanges.length > 0) {
|
||||
return trivia.substring(commentRanges[0].pos, commentRanges[0].end).trim();
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function convertToYaml(data: FileData): string {
|
||||
return yaml.dump(data, { sortKeys: false });
|
||||
}
|
||||
|
||||
function processDirectory(sourceDir: string, destDir: string): void {
|
||||
fs.readdirSync(sourceDir, { withFileTypes: true }).forEach(entry => {
|
||||
const sourcePath = path.join(sourceDir, entry.name);
|
||||
const destPath = path.join(destDir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
fs.mkdirSync(destPath, { recursive: true });
|
||||
processDirectory(sourcePath, destPath);
|
||||
} else if (entry.isFile() && entry.name.endsWith('.ts')) {
|
||||
const tsData = parseTypeScriptFile(sourcePath);
|
||||
const yamlContent = convertToYaml(tsData);
|
||||
const yamlPath = path.join(destDir, `${path.parse(entry.name).name}.yaml`);
|
||||
fs.writeFileSync(yamlPath, yamlContent);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const sourceDirectory = '/path/to/typescript/source/directory';
|
||||
const destinationDirectory = '/path/to/yaml/destination/directory';
|
||||
|
||||
processDirectory(sourceDirectory, destinationDirectory);
|
||||
console.log('Extraction and conversion completed.');
|
Loading…
Reference in a new issue