From f090f59582b58467c6111af503552f89d154a0db Mon Sep 17 00:00:00 2001 From: Patrick Stewart Date: Wed, 25 Sep 2024 13:12:33 -0700 Subject: [PATCH] add: adding scripts for API extraction across various languages --- api/extract_csharp_to_yaml.py | 175 ++++++++++++ api/extract_erlang_to_yaml.py | 127 +++++++++ api/extract_go_to_yaml.py | 198 ++++++++++++++ api/extract_java_to_yaml.py | 171 ++++++++++++ api/extract_javascript_to_yaml.py | 149 +++++++++++ .../extract_laravel_to_yaml.py | 0 api/extract_php_to_yaml.py | 253 ++++++++++++++++++ api/extract_python_to_yaml.py | 165 ++++++++++++ api/extract_rust_to_yaml.py | 215 +++++++++++++++ .../extract_symfony_to_yaml.py | 0 api/extract_typescript_to_yaml.py | 191 +++++++++++++ 11 files changed, 1644 insertions(+) create mode 100644 api/extract_csharp_to_yaml.py create mode 100644 api/extract_erlang_to_yaml.py create mode 100644 api/extract_go_to_yaml.py create mode 100644 api/extract_java_to_yaml.py create mode 100644 api/extract_javascript_to_yaml.py rename extract_laravel_to_yaml.py => api/extract_laravel_to_yaml.py (100%) create mode 100644 api/extract_php_to_yaml.py create mode 100644 api/extract_python_to_yaml.py create mode 100644 api/extract_rust_to_yaml.py rename extract_symfony_to_yaml.py => api/extract_symfony_to_yaml.py (100%) create mode 100644 api/extract_typescript_to_yaml.py diff --git a/api/extract_csharp_to_yaml.py b/api/extract_csharp_to_yaml.py new file mode 100644 index 0000000..d363255 --- /dev/null +++ b/api/extract_csharp_to_yaml.py @@ -0,0 +1,175 @@ +""" +This script extracts information from C# files and converts it to YAML format. +It processes C# files in a given source directory, extracts various components +such as namespaces, classes, properties, methods, and interfaces, and then +writes the extracted information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_csharp_file(file_path: str) -> Dict[str, Any]: + """ + Parse a C# file and extract its components. + Args: + file_path (str): Path to the C# file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the C# file (without extension) + - namespace: Namespace of the class + - class_comment: Comment for the class (if any) + - using_statements: List of using statements + - properties: List of class properties + - methods: List of class methods + - interfaces: List of interfaces implemented + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + namespace = extract_namespace(content) + class_comment = extract_class_comment(content) + using_statements = extract_using_statements(content) + properties = extract_properties(content) + methods = extract_methods(content) + interfaces = extract_interfaces(content) + + return { + "name": name, + "namespace": namespace, + "class_comment": class_comment, + "using_statements": using_statements, + "properties": properties, + "methods": methods, + "interfaces": interfaces + } + +def extract_namespace(content: str) -> str: + """ + Extract the namespace from C# content. + """ + namespace_pattern = r'namespace\s+([\w.]+)' + match = re.search(namespace_pattern, content) + return match.group(1) if match else "" + +def extract_class_comment(content: str) -> str: + """ + Extract the class-level comment from C# content. + """ + class_comment_pattern = r'/\*\*(.*?)\*/\s*(?:public|internal)?\s*class' + match = re.search(class_comment_pattern, content, re.DOTALL) + return match.group(1).strip() if match else "" + +def extract_using_statements(content: str) -> List[str]: + """ + Extract using statements from C# content. + """ + return re.findall(r'using\s+([\w.]+);', content) + +def extract_properties(content: str) -> List[Dict[str, Any]]: + """ + Extract class properties and their comments from C# content. + """ + property_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|private|protected|internal)\s+(?:virtual\s+)?(\w+)\s+(\w+)\s*{\s*get;\s*set;\s*}' + properties = re.findall(property_pattern, content, re.DOTALL) + return [ + { + "name": prop[3], + "type": prop[2], + "visibility": prop[1], + "comment": prop[0].strip() if prop[0] else None + } for prop in properties + ] + +def extract_methods(content: str) -> List[Dict[str, Any]]: + """ + Extract class methods and their comments from C# content. + """ + method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|private|protected|internal)\s+(?:virtual\s+)?(\w+)\s+(\w+)\s*\((.*?)\)' + methods = re.findall(method_pattern, content, re.DOTALL) + parsed_methods = [] + for method in methods: + parsed_methods.append({ + "name": method[3], + "return_type": method[2], + "visibility": method[1], + "parameters": parse_parameters(method[4]), + "comment": method[0].strip() if method[0] else None + }) + return parsed_methods + +def parse_parameters(params_str: str) -> List[Dict[str, str]]: + """ + Parse method parameters from a parameter string. + """ + params = params_str.split(',') + parsed_params = [] + for param in params: + param = param.strip() + if param: + parts = param.split() + parsed_params.append({"type": parts[0], "name": parts[1]}) + return parsed_params + +def extract_interfaces(content: str) -> List[str]: + """ + Extract interfaces implemented by the class in the C# content. + """ + interface_pattern = r'class\s+\w+\s*:\s*([\w,\s]+)' + match = re.search(interface_pattern, content) + if match: + return [interface.strip() for interface in match.group(1).split(',')] + return [] + +def convert_to_yaml(csharp_data: Dict[str, Any]) -> str: + """ + Convert extracted C# data to YAML format. + """ + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in csharp_data.items(): + if key == 'class_comment': + formatted_data['class_comment'] = format_comment(value) if value else None + elif key == 'properties': + formatted_data['properties'] = [ + {**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None} + for prop in value + ] + elif key == 'methods': + formatted_data['methods'] = [ + {**method, 'comment': format_comment(method['comment']) if method.get('comment') else None} + for method in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all C# files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.cs'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + csharp_data = parse_csharp_file(source_path) + yaml_content = convert_to_yaml(csharp_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/csharp/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_erlang_to_yaml.py b/api/extract_erlang_to_yaml.py new file mode 100644 index 0000000..d4effee --- /dev/null +++ b/api/extract_erlang_to_yaml.py @@ -0,0 +1,127 @@ +""" +This script extracts information from Erlang files and converts it to YAML format. +It processes Erlang files in a given source directory, extracts various components +such as module name, exports, imports, records, and functions, and then writes the +extracted information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_erlang_file(file_path: str) -> Dict[str, Any]: + """ + Parse an Erlang file and extract its components. + Args: + file_path (str): Path to the Erlang file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the Erlang file (without extension) + - module: Module name + - exports: List of exported functions + - imports: List of imported functions + - records: List of record definitions + - functions: List of function definitions + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + module = extract_module(content) + exports = extract_exports(content) + imports = extract_imports(content) + records = extract_records(content) + functions = extract_functions(content) + + return { + "name": name, + "module": module, + "exports": exports, + "imports": imports, + "records": records, + "functions": functions + } + +def extract_module(content: str) -> str: + """Extract the module name from Erlang content.""" + module_pattern = r'-module\(([^)]+)\)' + match = re.search(module_pattern, content) + return match.group(1) if match else "" + +def extract_exports(content: str) -> List[Dict[str, Any]]: + """Extract exported functions from Erlang content.""" + export_pattern = r'-export\(\[(.*?)\]\)' + exports = [] + for match in re.finditer(export_pattern, content): + exports.extend(parse_function_exports(match.group(1))) + return exports + +def parse_function_exports(export_str: str) -> List[Dict[str, Any]]: + """Parse exported function definitions.""" + function_pattern = r'(\w+)/(\d+)' + return [{"name": match[0], "arity": int(match[1])} for match in re.findall(function_pattern, export_str)] + +def extract_imports(content: str) -> List[Dict[str, Any]]: + """Extract imported functions from Erlang content.""" + import_pattern = r'-import\(([^,]+),\s*\[(.*?)\]\)' + imports = [] + for match in re.finditer(import_pattern, content): + module = match.group(1) + functions = parse_function_exports(match.group(2)) + imports.append({"module": module, "functions": functions}) + return imports + +def extract_records(content: str) -> List[Dict[str, Any]]: + """Extract record definitions from Erlang content.""" + record_pattern = r'-record\((\w+),\s*\{(.*?)\}\)' + records = [] + for match in re.finditer(record_pattern, content): + name = match.group(1) + fields = [field.strip() for field in match.group(2).split(',')] + records.append({"name": name, "fields": fields}) + return records + +def extract_functions(content: str) -> List[Dict[str, Any]]: + """Extract function definitions from Erlang content.""" + function_pattern = r'(\w+)\((.*?)\)\s*->(.*?)(?=\w+\(|\Z)' + functions = [] + for match in re.finditer(function_pattern, content, re.DOTALL): + name = match.group(1) + params = [param.strip() for param in match.group(2).split(',')] + body = match.group(3).strip() + functions.append({ + "name": name, + "parameters": params, + "body": body + }) + return functions + +def convert_to_yaml(erlang_data: Dict[str, Any]) -> str: + """Convert extracted Erlang data to YAML format.""" + return yaml.dump(erlang_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all Erlang files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.erl'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + erlang_data = parse_erlang_file(source_path) + yaml_content = convert_to_yaml(erlang_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/erlang/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_go_to_yaml.py b/api/extract_go_to_yaml.py new file mode 100644 index 0000000..f545d28 --- /dev/null +++ b/api/extract_go_to_yaml.py @@ -0,0 +1,198 @@ +""" +This script extracts information from Go files and converts it to YAML format. +It processes Go files in a given source directory, extracts various components +such as imports, structs, interfaces, and functions, and then writes the +extracted information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_go_file(file_path: str) -> Dict[str, Any]: + """ + Parse a Go file and extract its components. + Args: + file_path (str): Path to the Go file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the Go file (without extension) + - package: Package name + - imports: List of import statements + - structs: List of struct definitions + - interfaces: List of interface definitions + - functions: List of function definitions + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + package = extract_package(content) + imports = extract_imports(content) + structs = extract_structs(content) + interfaces = extract_interfaces(content) + functions = extract_functions(content) + + return { + "name": name, + "package": package, + "imports": imports, + "structs": structs, + "interfaces": interfaces, + "functions": functions + } + +def extract_package(content: str) -> str: + """ + Extract the package name from Go content. + """ + package_pattern = r'package\s+(\w+)' + match = re.search(package_pattern, content) + return match.group(1) if match else "" + +def extract_imports(content: str) -> List[str]: + """ + Extract import statements from Go content. + """ + import_pattern = r'import\s*\((.*?)\)' + match = re.search(import_pattern, content, re.DOTALL) + if match: + imports = re.findall(r'"(.+?)"', match.group(1)) + return imports + return [] + +def extract_structs(content: str) -> List[Dict[str, Any]]: + """ + Extract struct definitions from Go content. + """ + struct_pattern = r'//\s*(.+?)?\n\s*type\s+(\w+)\s+struct\s*{([^}]+)}' + structs = re.findall(struct_pattern, content, re.DOTALL) + return [ + { + "name": struct[1], + "comment": struct[0].strip() if struct[0] else None, + "fields": extract_struct_fields(struct[2]) + } for struct in structs + ] + +def extract_struct_fields(fields_str: str) -> List[Dict[str, str]]: + """ + Extract fields from a struct definition. + """ + field_pattern = r'(\w+)\s+(.+?)(?:`[^`]*`)?$' + return [ + {"name": field[0], "type": field[1].strip()} + for field in re.findall(field_pattern, fields_str, re.MULTILINE) + ] + +def extract_interfaces(content: str) -> List[Dict[str, Any]]: + """ + Extract interface definitions from Go content. + """ + interface_pattern = r'//\s*(.+?)?\n\s*type\s+(\w+)\s+interface\s*{([^}]+)}' + interfaces = re.findall(interface_pattern, content, re.DOTALL) + return [ + { + "name": interface[1], + "comment": interface[0].strip() if interface[0] else None, + "methods": extract_interface_methods(interface[2]) + } for interface in interfaces + ] + +def extract_interface_methods(interface_content: str) -> List[Dict[str, Any]]: + """ + Extract method signatures from an interface definition. + """ + method_pattern = r'(\w+)\((.*?)\)\s*(.*?)(?:\s*//.*)?$' + methods = re.findall(method_pattern, interface_content, re.MULTILINE) + return [ + { + "name": method[0], + "parameters": parse_parameters(method[1]), + "return_type": method[2].strip() if method[2] else None + } for method in methods + ] + +def extract_functions(content: str) -> List[Dict[str, Any]]: + """ + Extract function definitions from Go content. + """ + function_pattern = r'//\s*(.+?)?\n\s*func\s+(\w+)\s*\((.*?)\)\s*(.*?)\s*{' + functions = re.findall(function_pattern, content, re.DOTALL) + return [ + { + "name": function[1], + "comment": function[0].strip() if function[0] else None, + "receiver": extract_receiver(function[2]), + "parameters": parse_parameters(function[2]), + "return_type": function[3].strip() if function[3] else None + } for function in functions + ] + +def extract_receiver(params_str: str) -> Dict[str, str]: + """ + Extract the receiver from a method signature. + """ + receiver_pattern = r'(\w+)\s+\*?(\w+)' + match = re.match(receiver_pattern, params_str) + if match: + return {"name": match.group(1), "type": match.group(2)} + return {} + +def parse_parameters(params_str: str) -> List[Dict[str, str]]: + """ + Parse function parameters from a parameter string. + """ + params = params_str.split(',') + parsed_params = [] + for param in params: + param = param.strip() + if param and not re.match(r'^\w+\s+\*?\w+$', param): # Skip receiver + parts = param.split() + parsed_params.append({"name": parts[0], "type": ' '.join(parts[1:])}) + return parsed_params + +def convert_to_yaml(go_data: Dict[str, Any]) -> str: + """ + Convert extracted Go data to YAML format. + """ + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in go_data.items(): + if key in ['structs', 'interfaces', 'functions']: + formatted_data[key] = [ + {**item, 'comment': format_comment(item['comment']) if item.get('comment') else None} + for item in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all Go files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.go'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + go_data = parse_go_file(source_path) + yaml_content = convert_to_yaml(go_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/go/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_java_to_yaml.py b/api/extract_java_to_yaml.py new file mode 100644 index 0000000..7073169 --- /dev/null +++ b/api/extract_java_to_yaml.py @@ -0,0 +1,171 @@ +""" +This script extracts information from Java files and converts it to YAML format. +It processes Java files in a given source directory, extracts various components +such as package, imports, class info, fields, methods, and interfaces, and then +writes the extracted information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_java_file(file_path: str) -> Dict[str, Any]: + """ + Parse a Java file and extract its components. + Args: + file_path (str): Path to the Java file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the Java file (without extension) + - package: Package declaration + - imports: List of import statements + - class_info: Information about the class (name, modifiers, extends, implements) + - class_comment: Comment for the class (if any) + - fields: List of class fields + - methods: List of class methods + - interfaces: List of interfaces implemented + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + package = extract_package(content) + imports = extract_imports(content) + class_info = extract_class_info(content) + class_comment = extract_class_comment(content) + fields = extract_fields(content) + methods = extract_methods(content) + interfaces = extract_interfaces(content) + + return { + "name": name, + "package": package, + "imports": imports, + "class_info": class_info, + "class_comment": class_comment, + "fields": fields, + "methods": methods, + "interfaces": interfaces + } + +def extract_package(content: str) -> str: + """Extract the package declaration from Java content.""" + package_pattern = r'package\s+([\w.]+);' + match = re.search(package_pattern, content) + return match.group(1) if match else "" + +def extract_imports(content: str) -> List[str]: + """Extract import statements from Java content.""" + import_pattern = r'import\s+([\w.]+);' + return re.findall(import_pattern, content) + +def extract_class_info(content: str) -> Dict[str, Any]: + """Extract class information from Java content.""" + class_pattern = r'(public\s+)?(abstract\s+)?(final\s+)?class\s+(\w+)(\s+extends\s+\w+)?(\s+implements\s+[\w,\s]+)?' + match = re.search(class_pattern, content) + if match: + return { + "name": match.group(4), + "modifiers": [mod for mod in [match.group(1), match.group(2), match.group(3)] if mod], + "extends": match.group(5).split()[-1] if match.group(5) else None, + "implements": match.group(6).split()[-1].split(',') if match.group(6) else [] + } + return {} + +def extract_class_comment(content: str) -> str: + """Extract the class-level comment from Java content.""" + class_comment_pattern = r'/\*\*(.*?)\*/\s*(?:public\s+)?(?:abstract\s+)?(?:final\s+)?class' + match = re.search(class_comment_pattern, content, re.DOTALL) + return match.group(1).strip() if match else "" + +def extract_fields(content: str) -> List[Dict[str, Any]]: + """Extract class fields from Java content.""" + field_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(?:final\s+)?(\w+)\s+(\w+)(?:\s*=\s*[^;]+)?;' + fields = re.findall(field_pattern, content, re.DOTALL) + return [ + { + "name": field[3], + "type": field[2], + "visibility": field[1], + "comment": field[0].strip() if field[0] else None + } for field in fields + ] + +def extract_methods(content: str) -> List[Dict[str, Any]]: + """Extract class methods from Java content.""" + method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s+(\w+)\s*\((.*?)\)' + methods = re.findall(method_pattern, content, re.DOTALL) + parsed_methods = [] + for method in methods: + parsed_methods.append({ + "name": method[3], + "return_type": method[2], + "visibility": method[1], + "parameters": parse_parameters(method[4]), + "comment": method[0].strip() if method[0] else None + }) + return parsed_methods + +def parse_parameters(params_str: str) -> List[Dict[str, str]]: + """Parse method parameters from a parameter string.""" + params = params_str.split(',') + parsed_params = [] + for param in params: + param = param.strip() + if param: + parts = param.split() + parsed_params.append({"type": parts[0], "name": parts[1]}) + return parsed_params + +def extract_interfaces(content: str) -> List[str]: + """Extract interfaces implemented by the class in the Java content.""" + interface_pattern = r'implements\s+([\w,\s]+)' + match = re.search(interface_pattern, content) + if match: + return [interface.strip() for interface in match.group(1).split(',')] + return [] + +def convert_to_yaml(java_data: Dict[str, Any]) -> str: + """Convert extracted Java data to YAML format.""" + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in java_data.items(): + if key == 'class_comment': + formatted_data['class_comment'] = format_comment(value) if value else None + elif key in ['fields', 'methods']: + formatted_data[key] = [ + {**item, 'comment': format_comment(item['comment']) if item.get('comment') else None} + for item in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all Java files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.java'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + java_data = parse_java_file(source_path) + yaml_content = convert_to_yaml(java_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/java/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_javascript_to_yaml.py b/api/extract_javascript_to_yaml.py new file mode 100644 index 0000000..362517c --- /dev/null +++ b/api/extract_javascript_to_yaml.py @@ -0,0 +1,149 @@ +""" +This script extracts information from JavaScript files and converts it to YAML format. +It processes JavaScript files in a given source directory, extracts various components +such as imports, classes, properties, and methods, and then writes the extracted +information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_javascript_file(file_path: str) -> Dict[str, Any]: + """ + Parse a JavaScript file and extract its components. + Args: + file_path (str): Path to the JavaScript file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the JavaScript file (without extension) + - imports: List of import statements + - class_comment: Comment for the class (if any) + - class_name: Name of the class + - properties: List of class properties + - methods: List of class methods + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + imports = extract_imports(content) + class_comment, class_name = extract_class_info(content) + properties = extract_properties(content) + methods = extract_methods(content) + + return { + "name": name, + "imports": imports, + "class_comment": class_comment, + "class_name": class_name, + "properties": properties, + "methods": methods + } + +def extract_imports(content: str) -> List[str]: + """ + Extract import statements from JavaScript content. + """ + import_pattern = r'import\s+.*?from\s+[\'"].*?[\'"];' + return re.findall(import_pattern, content) + +def extract_class_info(content: str) -> tuple: + """ + Extract class comment and name from JavaScript content. + """ + class_pattern = r'(/\*\*(.*?)\*/\s*)?class\s+(\w+)' + match = re.search(class_pattern, content, re.DOTALL) + if match: + comment = match.group(2).strip() if match.group(2) else "" + name = match.group(3) + return comment, name + return "", "" + +def extract_properties(content: str) -> List[Dict[str, Any]]: + """ + Extract class properties from JavaScript content. + """ + property_pattern = r'(/\*\*(.*?)\*/\s*)?(static\s+)?(\w+)\s*=\s*' + properties = re.findall(property_pattern, content, re.DOTALL) + return [ + { + "name": prop[3], + "static": bool(prop[2]), + "comment": prop[1].strip() if prop[1] else None + } for prop in properties + ] + +def extract_methods(content: str) -> List[Dict[str, Any]]: + """ + Extract class methods from JavaScript content. + """ + method_pattern = r'(/\*\*(.*?)\*/\s*)?(static\s+)?(\w+)\s*\((.*?)\)\s*{' + methods = re.findall(method_pattern, content, re.DOTALL) + parsed_methods = [] + for method in methods: + parsed_methods.append({ + "name": method[3], + "static": bool(method[2]), + "parameters": parse_parameters(method[4]), + "comment": method[1].strip() if method[1] else None + }) + return parsed_methods + +def parse_parameters(params_str: str) -> List[str]: + """ + Parse method parameters from a parameter string. + """ + return [param.strip() for param in params_str.split(',') if param.strip()] + +def convert_to_yaml(js_data: Dict[str, Any]) -> str: + """ + Convert extracted JavaScript data to YAML format. + """ + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in js_data.items(): + if key == 'class_comment': + formatted_data['class_comment'] = format_comment(value) if value else None + elif key == 'properties': + formatted_data['properties'] = [ + {**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None} + for prop in value + ] + elif key == 'methods': + formatted_data['methods'] = [ + {**method, 'comment': format_comment(method['comment']) if method.get('comment') else None} + for method in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all JavaScript files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.js'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + js_data = parse_javascript_file(source_path) + yaml_content = convert_to_yaml(js_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/javascript/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/extract_laravel_to_yaml.py b/api/extract_laravel_to_yaml.py similarity index 100% rename from extract_laravel_to_yaml.py rename to api/extract_laravel_to_yaml.py diff --git a/api/extract_php_to_yaml.py b/api/extract_php_to_yaml.py new file mode 100644 index 0000000..6b46172 --- /dev/null +++ b/api/extract_php_to_yaml.py @@ -0,0 +1,253 @@ +""" +This script extracts information from PHP files and converts it to YAML format. +It processes PHP files in a given source directory, extracts various components +such as dependencies, properties, methods, traits, and interfaces, and then +writes the extracted information to YAML files in a specified destination directory. +""" + +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_php_file(file_path: str) -> Dict[str, Any]: + """ + Parse a PHP file and extract its components. + + Args: + file_path (str): Path to the PHP file. + + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the PHP file (without extension) + - class_comment: Comment for the class (if any) + - dependencies: List of dependencies (use statements) + - properties: List of class properties + - methods: List of class methods + - traits: List of traits used + - interfaces: List of interfaces implemented + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + class_comment = extract_class_comment(content) + dependencies = extract_dependencies(content) + properties = extract_properties(content) + methods = extract_methods(content) + traits = extract_traits(content) + interfaces = extract_interfaces(content) + + return { + "name": name, + "class_comment": class_comment, + "dependencies": dependencies, + "properties": properties, + "methods": methods, + "traits": traits, + "interfaces": interfaces + } + +def extract_class_comment(content: str) -> str: + """ + Extract the class-level comment from PHP content. + + Args: + content (str): PHP file content. + + Returns: + str: Extracted class comment or empty string if not found. + """ + class_comment_pattern = r'/\*\*(.*?)\*/\s*class' + match = re.search(class_comment_pattern, content, re.DOTALL) + if match: + return match.group(1).strip() + return "" + +def extract_dependencies(content: str) -> List[Dict[str, str]]: + """ + Extract dependencies (use statements) from PHP content. + + Args: + content (str): PHP file content. + + Returns: + List[Dict[str, str]]: List of dictionaries containing dependency information: + - name: Alias or class name + - type: Always "class" for now (might need refinement) + - source: Full namespace of the dependency + """ + # Regex pattern to match use statements, capturing the full namespace and optional alias + use_statements = re.findall(r'use\s+([\w\\]+)(?:\s+as\s+(\w+))?;', content) + dependencies = [] + for use in use_statements: + dep = { + "name": use[1] if use[1] else use[0].split('\\')[-1], + "type": "class", # Assuming class for now, might need refinement + "source": use[0] + } + dependencies.append(dep) + return dependencies + +def extract_properties(content: str) -> List[Dict[str, Any]]: + """ + Extract class properties and their comments from PHP content. + + Args: + content (str): PHP file content. + + Returns: + List[Dict[str, Any]]: List of dictionaries containing property information: + - name: Property name (without $) + - visibility: public, protected, or private + - comment: Property comment (if any) + """ + # Regex pattern to match property declarations with optional comments + property_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?(\$\w+)(?:\s*=\s*[^;]+)?;' + properties = re.findall(property_pattern, content, re.DOTALL) + return [ + { + "name": prop[2][1:], + "visibility": prop[1], + "comment": prop[0].strip() if prop[0] else None + } for prop in properties + ] + +def extract_methods(content: str) -> List[Dict[str, Any]]: + """ + Extract class methods and their comments from PHP content. + + Args: + content (str): PHP file content. + + Returns: + List[Dict[str, Any]]: List of dictionaries containing method information: + - name: Method name + - visibility: public, protected, or private + - parameters: List of parameter dictionaries + - comment: Method comment (if any) + """ + # Regex pattern to match method declarations with optional comments + method_pattern = r'(?:/\*\*(.*?)\*/\s*)?(public|protected|private)\s+(?:static\s+)?function\s+(\w+)\s*\((.*?)\)' + methods = re.findall(method_pattern, content, re.DOTALL) + parsed_methods = [] + for method in methods: + parsed_methods.append({ + "name": method[2], + "visibility": method[1], + "parameters": parse_parameters(method[3]), + "comment": method[0].strip() if method[0] else None + }) + return parsed_methods + +def parse_parameters(params_str: str) -> List[Dict[str, str]]: + """ + Parse method parameters from a parameter string. + + Args: + params_str (str): String containing method parameters. + + Returns: + List[Dict[str, str]]: List of dictionaries containing parameter information: + - name: Parameter name + - default: Default value (if specified) + """ + params = params_str.split(',') + parsed_params = [] + for param in params: + param = param.strip() + if param: + parts = param.split('=') + param_dict = {"name": parts[0].split()[-1].strip('$')} + if len(parts) > 1: + param_dict["default"] = parts[1].strip() + parsed_params.append(param_dict) + return parsed_params + +def extract_traits(content: str) -> List[str]: + """ + Extract traits used in the PHP content. + + Args: + content (str): PHP file content. + + Returns: + List[str]: List of trait names used in the class. + """ + return re.findall(r'use\s+([\w\\]+)(?:,\s*[\w\\]+)*;', content) + +def extract_interfaces(content: str) -> List[str]: + """ + Extract interfaces implemented by the class in the PHP content. + + Args: + content (str): PHP file content. + + Returns: + List[str]: List of interface names implemented by the class. + """ + return re.findall(r'implements\s+([\w\\]+)(?:,\s*[\w\\]+)*', content) + +def convert_to_yaml(php_data: Dict[str, Any]) -> str: + """ + Convert extracted PHP data to YAML format. + + Args: + php_data (Dict[str, Any]): Dictionary containing extracted PHP data. + + Returns: + str: YAML representation of the PHP data. + """ + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in php_data.items(): + if key == 'class_comment': + formatted_data['class_comment'] = format_comment(value) if value else None + elif key == 'properties': + formatted_data['properties'] = [ + {**prop, 'comment': format_comment(prop['comment']) if prop['comment'] else None} + for prop in value + ] + elif key == 'methods': + formatted_data['methods'] = [ + {**method, 'comment': format_comment(method['comment']) if method.get('comment') else None} + for method in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all PHP files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + + Args: + source_dir (str): Path to the source directory containing PHP files. + dest_dir (str): Path to the destination directory for YAML files. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.php'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + + os.makedirs(dest_path, exist_ok=True) + + php_data = parse_php_file(source_path) + yaml_content = convert_to_yaml(php_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/home/platform/Devboxes/resources/laravel_framework/src/Illuminate/" + destination_directory = "/home/platform/Devboxes/platform/api/" + + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_python_to_yaml.py b/api/extract_python_to_yaml.py new file mode 100644 index 0000000..3c2498f --- /dev/null +++ b/api/extract_python_to_yaml.py @@ -0,0 +1,165 @@ +""" +This script extracts information from Python files and converts it to YAML format. +It processes Python files in a given source directory, extracts various components +such as imports, classes, methods, and properties, and then writes the extracted +information to YAML files in a specified destination directory. +""" +import os +import re +import ast +import yaml +from typing import Dict, List, Any + +def parse_python_file(file_path: str) -> Dict[str, Any]: + """ + Parse a Python file and extract its components. + Args: + file_path (str): Path to the Python file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the Python file (without extension) + - class_comment: Comment for the class (if any) + - imports: List of import statements + - classes: List of class information + """ + with open(file_path, 'r') as file: + content = file.read() + + tree = ast.parse(content) + name = os.path.basename(file_path).split('.')[0] + imports = extract_imports(tree) + classes = extract_classes(tree) + + return { + "name": name, + "imports": imports, + "classes": classes + } + +def extract_imports(tree: ast.AST) -> List[Dict[str, str]]: + """ + Extract import statements from Python AST. + Args: + tree (ast.AST): Python abstract syntax tree. + Returns: + List[Dict[str, str]]: List of dictionaries containing import information: + - name: Imported name + - source: Module source + """ + imports = [] + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + imports.append({"name": alias.name, "source": alias.name}) + elif isinstance(node, ast.ImportFrom): + module = node.module + for alias in node.names: + imports.append({"name": alias.name, "source": f"{module}.{alias.name}"}) + return imports + +def extract_classes(tree: ast.AST) -> List[Dict[str, Any]]: + """ + Extract class information from Python AST. + Args: + tree (ast.AST): Python abstract syntax tree. + Returns: + List[Dict[str, Any]]: List of dictionaries containing class information: + - name: Class name + - comment: Class docstring (if any) + - bases: List of base classes + - methods: List of method information + - properties: List of class properties + """ + classes = [] + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + class_info = { + "name": node.name, + "comment": ast.get_docstring(node), + "bases": [base.id for base in node.bases if isinstance(base, ast.Name)], + "methods": extract_methods(node), + "properties": extract_properties(node) + } + classes.append(class_info) + return classes + +def extract_methods(class_node: ast.ClassDef) -> List[Dict[str, Any]]: + """ + Extract method information from a class node. + Args: + class_node (ast.ClassDef): Class definition node. + Returns: + List[Dict[str, Any]]: List of dictionaries containing method information: + - name: Method name + - comment: Method docstring (if any) + - parameters: List of parameter names + """ + methods = [] + for node in class_node.body: + if isinstance(node, ast.FunctionDef): + method_info = { + "name": node.name, + "comment": ast.get_docstring(node), + "parameters": [arg.arg for arg in node.args.args if arg.arg != 'self'] + } + methods.append(method_info) + return methods + +def extract_properties(class_node: ast.ClassDef) -> List[Dict[str, str]]: + """ + Extract property information from a class node. + Args: + class_node (ast.ClassDef): Class definition node. + Returns: + List[Dict[str, str]]: List of dictionaries containing property information: + - name: Property name + - type: Property type (if annotated) + """ + properties = [] + for node in class_node.body: + if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name): + prop_info = { + "name": node.target.id, + "type": ast.unparse(node.annotation) if node.annotation else None + } + properties.append(prop_info) + return properties + +def convert_to_yaml(python_data: Dict[str, Any]) -> str: + """ + Convert extracted Python data to YAML format. + Args: + python_data (Dict[str, Any]): Dictionary containing extracted Python data. + Returns: + str: YAML representation of the Python data. + """ + return yaml.dump(python_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all Python files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + Args: + source_dir (str): Path to the source directory containing Python files. + dest_dir (str): Path to the destination directory for YAML files. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.py'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + python_data = parse_python_file(source_path) + yaml_content = convert_to_yaml(python_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/python/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/api/extract_rust_to_yaml.py b/api/extract_rust_to_yaml.py new file mode 100644 index 0000000..a737eb1 --- /dev/null +++ b/api/extract_rust_to_yaml.py @@ -0,0 +1,215 @@ +""" +This script extracts information from Rust files and converts it to YAML format. +It processes Rust files in a given source directory, extracts various components +such as dependencies, structs, impl blocks, traits, and functions, and then +writes the extracted information to YAML files in a specified destination directory. +""" +import os +import re +import yaml +from typing import Dict, List, Any + +def parse_rust_file(file_path: str) -> Dict[str, Any]: + """ + Parse a Rust file and extract its components. + Args: + file_path (str): Path to the Rust file. + Returns: + Dict[str, Any]: A dictionary containing extracted information: + - name: Name of the Rust file (without extension) + - module_comment: Comment for the module (if any) + - dependencies: List of dependencies (use statements) + - structs: List of struct definitions + - impls: List of impl blocks + - traits: List of trait definitions + - functions: List of standalone functions + """ + with open(file_path, 'r') as file: + content = file.read() + + name = os.path.basename(file_path).split('.')[0] + module_comment = extract_module_comment(content) + dependencies = extract_dependencies(content) + structs = extract_structs(content) + impls = extract_impls(content) + traits = extract_traits(content) + functions = extract_functions(content) + + return { + "name": name, + "module_comment": module_comment, + "dependencies": dependencies, + "structs": structs, + "impls": impls, + "traits": traits, + "functions": functions + } + +def extract_module_comment(content: str) -> str: + """ + Extract the module-level comment from Rust content. + """ + module_comment_pattern = r'^//!(.+?)(?=\n\S)' + match = re.search(module_comment_pattern, content, re.DOTALL | re.MULTILINE) + return match.group(1).strip() if match else "" + +def extract_dependencies(content: str) -> List[str]: + """ + Extract dependencies (use statements) from Rust content. + """ + return re.findall(r'use\s+([\w:]+)(?:::\{.*?\})?;', content) + +def extract_structs(content: str) -> List[Dict[str, Any]]: + """ + Extract struct definitions from Rust content. + """ + struct_pattern = r'///(.+?)?\n\s*pub struct (\w+)(?:<.*?>)?\s*\{([^}]+)\}' + structs = re.findall(struct_pattern, content, re.DOTALL) + return [ + { + "name": struct[1], + "comment": struct[0].strip() if struct[0] else None, + "fields": extract_struct_fields(struct[2]) + } for struct in structs + ] + +def extract_struct_fields(fields_str: str) -> List[Dict[str, str]]: + """ + Extract fields from a struct definition. + """ + field_pattern = r'pub (\w+):\s*(.+)' + return [ + {"name": field[0], "type": field[1].strip()} + for field in re.findall(field_pattern, fields_str) + ] + +def extract_impls(content: str) -> List[Dict[str, Any]]: + """ + Extract impl blocks from Rust content. + """ + impl_pattern = r'impl(?:<.*?>)?\s+(\w+)\s*(?:for\s+(\w+))?\s*\{([^}]+)\}' + impls = re.findall(impl_pattern, content, re.DOTALL) + return [ + { + "struct": impl[0], + "trait": impl[1] if impl[1] else None, + "methods": extract_methods(impl[2]) + } for impl in impls + ] + +def extract_methods(impl_content: str) -> List[Dict[str, Any]]: + """ + Extract methods from an impl block. + """ + method_pattern = r'///(.+?)?\n\s*pub fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^{]+))?\s*\{' + methods = re.findall(method_pattern, impl_content, re.DOTALL) + return [ + { + "name": method[1], + "comment": method[0].strip() if method[0] else None, + "parameters": parse_parameters(method[2]), + "return_type": method[3].strip() if method[3] else None + } for method in methods + ] + +def parse_parameters(params_str: str) -> List[Dict[str, str]]: + """ + Parse method parameters from a parameter string. + """ + params = params_str.split(',') + parsed_params = [] + for param in params: + param = param.strip() + if param: + parts = param.split(':') + parsed_params.append({"name": parts[0].strip(), "type": parts[1].strip()}) + return parsed_params + +def extract_traits(content: str) -> List[Dict[str, Any]]: + """ + Extract trait definitions from Rust content. + """ + trait_pattern = r'pub trait (\w+)(?:<.*?>)?\s*\{([^}]+)\}' + traits = re.findall(trait_pattern, content, re.DOTALL) + return [ + { + "name": trait[0], + "methods": extract_trait_methods(trait[1]) + } for trait in traits + ] + +def extract_trait_methods(trait_content: str) -> List[Dict[str, str]]: + """ + Extract method signatures from a trait definition. + """ + method_pattern = r'fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^;]+))?;' + methods = re.findall(method_pattern, trait_content) + return [ + { + "name": method[0], + "parameters": parse_parameters(method[1]), + "return_type": method[2].strip() if method[2] else None + } for method in methods + ] + +def extract_functions(content: str) -> List[Dict[str, Any]]: + """ + Extract standalone functions from Rust content. + """ + function_pattern = r'///(.+?)?\n\s*pub fn (\w+)\s*\(([^)]*)\)(?:\s*->\s*([^{]+))?\s*\{' + functions = re.findall(function_pattern, content, re.DOTALL) + return [ + { + "name": function[1], + "comment": function[0].strip() if function[0] else None, + "parameters": parse_parameters(function[2]), + "return_type": function[3].strip() if function[3] else None + } for function in functions + ] + +def convert_to_yaml(rust_data: Dict[str, Any]) -> str: + """ + Convert extracted Rust data to YAML format. + """ + def format_comment(comment: str) -> str: + return '\n'.join('# ' + line.strip() for line in comment.split('\n')) + + formatted_data = {} + for key, value in rust_data.items(): + if key == 'module_comment': + formatted_data['module_comment'] = format_comment(value) if value else None + elif key in ['structs', 'impls', 'traits', 'functions']: + formatted_data[key] = [ + {**item, 'comment': format_comment(item['comment']) if item.get('comment') else None} + for item in value + ] + else: + formatted_data[key] = value + + return yaml.dump(formatted_data, sort_keys=False, default_flow_style=False) + +def process_directory(source_dir: str, dest_dir: str): + """ + Process all Rust files in the source directory and its subdirectories, + extract information, and save as YAML files in the destination directory. + """ + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.rs'): + source_path = os.path.join(root, file) + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, os.path.dirname(relative_path)) + os.makedirs(dest_path, exist_ok=True) + + rust_data = parse_rust_file(source_path) + yaml_content = convert_to_yaml(rust_data) + + yaml_file = os.path.join(dest_path, f"{os.path.splitext(file)[0]}.yaml") + with open(yaml_file, 'w') as f: + f.write(yaml_content) + +if __name__ == "__main__": + source_directory = "/path/to/rust/source/directory" + destination_directory = "/path/to/yaml/destination/directory" + process_directory(source_directory, destination_directory) + print("Extraction and conversion completed.") diff --git a/extract_symfony_to_yaml.py b/api/extract_symfony_to_yaml.py similarity index 100% rename from extract_symfony_to_yaml.py rename to api/extract_symfony_to_yaml.py diff --git a/api/extract_typescript_to_yaml.py b/api/extract_typescript_to_yaml.py new file mode 100644 index 0000000..c47e751 --- /dev/null +++ b/api/extract_typescript_to_yaml.py @@ -0,0 +1,191 @@ +/** + * This script extracts information from TypeScript files and converts it to YAML format. + * It processes TypeScript files in a given source directory, extracts various components + * such as imports, classes, methods, and properties, and then writes the extracted + * information to YAML files in a specified destination directory. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as ts from 'typescript'; +import * as yaml from 'js-yaml'; + +interface FileData { + name: string; + imports: Import[]; + classes: ClassInfo[]; +} + +interface Import { + name: string; + source: string; +} + +interface ClassInfo { + name: string; + comment?: string; + extends?: string[]; + implements?: string[]; + methods: MethodInfo[]; + properties: PropertyInfo[]; +} + +interface MethodInfo { + name: string; + comment?: string; + parameters: ParameterInfo[]; + returnType?: string; +} + +interface ParameterInfo { + name: string; + type?: string; +} + +interface PropertyInfo { + name: string; + type?: string; + visibility: string; +} + +function parseTypeScriptFile(filePath: string): FileData { + const content = fs.readFileSync(filePath, 'utf-8'); + const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true); + + const name = path.basename(filePath).split('.')[0]; + const imports = extractImports(sourceFile); + const classes = extractClasses(sourceFile); + + return { name, imports, classes }; +} + +function extractImports(sourceFile: ts.SourceFile): Import[] { + const imports: Import[] = []; + + ts.forEachChild(sourceFile, node => { + if (ts.isImportDeclaration(node)) { + const importClause = node.importClause; + const moduleSpecifier = node.moduleSpecifier; + + if (importClause && ts.isStringLiteral(moduleSpecifier)) { + const name = importClause.name?.text ?? '*'; + const source = moduleSpecifier.text; + imports.push({ name, source }); + } + } + }); + + return imports; +} + +function extractClasses(sourceFile: ts.SourceFile): ClassInfo[] { + const classes: ClassInfo[] = []; + + ts.forEachChild(sourceFile, node => { + if (ts.isClassDeclaration(node) && node.name) { + const classInfo: ClassInfo = { + name: node.name.text, + comment: getLeadingCommentText(node), + extends: node.heritageClauses?.filter(clause => clause.token === ts.SyntaxKind.ExtendsKeyword) + .flatMap(clause => clause.types.map(t => t.getText())), + implements: node.heritageClauses?.filter(clause => clause.token === ts.SyntaxKind.ImplementsKeyword) + .flatMap(clause => clause.types.map(t => t.getText())), + methods: extractMethods(node), + properties: extractProperties(node) + }; + classes.push(classInfo); + } + }); + + return classes; +} + +function extractMethods(classNode: ts.ClassDeclaration): MethodInfo[] { + const methods: MethodInfo[] = []; + + classNode.members.forEach(member => { + if (ts.isMethodDeclaration(member) && member.name) { + const methodInfo: MethodInfo = { + name: member.name.getText(), + comment: getLeadingCommentText(member), + parameters: extractParameters(member), + returnType: member.type ? member.type.getText() : undefined + }; + methods.push(methodInfo); + } + }); + + return methods; +} + +function extractParameters(method: ts.MethodDeclaration): ParameterInfo[] { + return method.parameters.map(param => ({ + name: param.name.getText(), + type: param.type ? param.type.getText() : undefined + })); +} + +function extractProperties(classNode: ts.ClassDeclaration): PropertyInfo[] { + const properties: PropertyInfo[] = []; + + classNode.members.forEach(member => { + if (ts.isPropertyDeclaration(member) && member.name) { + const propertyInfo: PropertyInfo = { + name: member.name.getText(), + type: member.type ? member.type.getText() : undefined, + visibility: getVisibility(member) + }; + properties.push(propertyInfo); + } + }); + + return properties; +} + +function getVisibility(node: ts.Node): string { + if (node.modifiers) { + if (node.modifiers.some(m => m.kind === ts.SyntaxKind.PrivateKeyword)) return 'private'; + if (node.modifiers.some(m => m.kind === ts.SyntaxKind.ProtectedKeyword)) return 'protected'; + if (node.modifiers.some(m => m.kind === ts.SyntaxKind.PublicKeyword)) return 'public'; + } + return 'public'; // Default visibility in TypeScript +} + +function getLeadingCommentText(node: ts.Node): string | undefined { + const fullText = node.getFullText(); + const trivia = fullText.substring(0, node.getLeadingTriviaWidth()); + const commentRanges = ts.getLeadingCommentRanges(trivia, 0); + + if (commentRanges && commentRanges.length > 0) { + return trivia.substring(commentRanges[0].pos, commentRanges[0].end).trim(); + } + + return undefined; +} + +function convertToYaml(data: FileData): string { + return yaml.dump(data, { sortKeys: false }); +} + +function processDirectory(sourceDir: string, destDir: string): void { + fs.readdirSync(sourceDir, { withFileTypes: true }).forEach(entry => { + const sourcePath = path.join(sourceDir, entry.name); + const destPath = path.join(destDir, entry.name); + + if (entry.isDirectory()) { + fs.mkdirSync(destPath, { recursive: true }); + processDirectory(sourcePath, destPath); + } else if (entry.isFile() && entry.name.endsWith('.ts')) { + const tsData = parseTypeScriptFile(sourcePath); + const yamlContent = convertToYaml(tsData); + const yamlPath = path.join(destDir, `${path.parse(entry.name).name}.yaml`); + fs.writeFileSync(yamlPath, yamlContent); + } + }); +} + +const sourceDirectory = '/path/to/typescript/source/directory'; +const destinationDirectory = '/path/to/yaml/destination/directory'; + +processDirectory(sourceDirectory, destinationDirectory); +console.log('Extraction and conversion completed.');