diff --git a/README.md b/README.md new file mode 100644 index 0000000..865cecb --- /dev/null +++ b/README.md @@ -0,0 +1,133 @@ +# jvm-rs + +A Java Virtual Machine (JVM) implementation written in Rust, capable of parsing and executing Java class files and bytecode. + +## Overview + +jvm-rs is an educational/experimental JVM implementation that demonstrates the core components and execution model of the Java Virtual Machine. The project uses Rust's type safety and modern tooling to build a simplified but functional JVM interpreter. + +## Features + +### Currently Implemented + +- **Class File Parsing**: Full support for reading and deserializing binary Java class files (`.class`) +- **Constant Pool Management**: Handles 20+ constant pool entry types (UTF8, Integer, Float, Long, Double, Class, String, MethodRef, FieldRef, etc.) +- **Dynamic Class Loading**: On-demand class loading with superclass and interface resolution +- **Class Initialization**: Automatic `` method execution during class initialization +- **Bytecode Execution**: Interpreter for JVM bytecode instructions including: + - Constant loading (`ldc`, `ldc2_w`) + - Load/store operations (`fload`, `dload`, `fstore`, `dstore`, etc.) + - Type conversions (`f2d`) + - Arithmetic operations (`dadd`) + - Field access (`getstatic`) + - Method invocation (`invokevirtual`, `invokestatic`) + - Control flow (`return_void`) +- **Module System**: Support for loading classes from 7z binary image archives +- **Frame-based Execution**: Proper execution context with program counter, operand stack, and local variables + +### In Development + +Many bytecode instructions and JVM features are planned but not yet implemented: +- Most bytecode operations (array operations, object creation, exception handling, etc.) +- Complete object model and field access +- Method resolution and invoke dynamic +- Exception handling and throw/catch +- Type checking and validation +- Garbage collection +- JNI/Native interface support + +## Architecture + +### Core Components + +- **`Vm`** (`vm.rs`): Main virtual machine controller managing threads and class loader +- **`VmThread`** (`thread.rs`): Thread of execution managing the frame stack and method invocation +- **`Frame`** (`lib.rs`): Execution context for a method with PC, operand stack, and local variables +- **`ClassLoader`** (`class_loader.rs`): Handles dynamic class loading, linking, and initialization +- **`RuntimeClass`** (`class.rs`): Runtime representation of a loaded class +- **`ClassFile`** (`class_file/`): Binary parser for Java class files using the `deku` library +- **`ConstantPool`** (`class_file/constant_pool.rs`): Constant pool resolution and management +- **`Object`** (`object.rs`): Runtime representation of Java objects + +### Execution Flow + +1. **Loading**: `ClassFile::from_bytes()` parses binary class file data +2. **Resolution**: `ClassLoader` converts `ClassFile` to `RuntimeClass`, resolving dependencies +3. **Execution**: `VmThread` invokes the main method, creating a `Frame` +4. **Interpretation**: `Frame` iterates through bytecode operations, executing each instruction +5. **Stack Operations**: Instructions manipulate the operand stack and local variables + +## Usage + +```rust +use jvm_rs::{Vm, VmError}; + +fn main() -> Result<(), VmError> { + // Initialize the VM + let vm = Vm::new(); + + // Execute a main method + vm.run_main("path/to/ClassFile.class")?; + + Ok(()) +} +``` + +## Dependencies + +- **`deku`**: Binary parsing and serialization for class files +- **`log`** / **`env_logger`**: Logging infrastructure +- **`itertools`**: Iterator utilities +- **`sevenz-rust2`**: 7z archive reading for module system support + +## Project Structure + +``` +jvm-rs/ +├── src/ +│ ├── lib.rs # Frame and Value definitions +│ ├── main.rs # Entry point +│ ├── vm.rs # Virtual Machine implementation +│ ├── thread.rs # Thread execution management +│ ├── class.rs # RuntimeClass definition +│ ├── class_loader.rs # ClassLoader implementation +│ ├── class_file/ +│ │ ├── class_file.rs # ClassFile parser (magic 0xCAFEBABE) +│ │ └── constant_pool.rs # Constant pool types +│ ├── attributes.rs # Bytecode operations and attributes +│ ├── macros.rs # Helper macros +│ ├── object.rs # Object representation +│ ├── bimage.rs # Binary image (7z) reader +│ └── rng.rs # RNG utilities +├── data/ # Test class files +└── lib/ # Module library (7z archives) +``` + +## Building + +```bash +# Build the project +cargo build + +# Build with optimizations +cargo build --release + +# Run tests +cargo test + +# Run with logging +RUST_LOG=debug cargo run +``` + +## Current Status + +This project is in early development (v0.1.0). The core infrastructure for class loading and basic bytecode execution is in place, but many JVM features remain unimplemented. Contributions and experimentation are welcome! + +## License + +[Add your license here] + +## References + +- [JVM Specification](https://docs.oracle.com/javase/specs/jvms/se21/html/index.html) +- [Java Class File Format](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html) \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..75c686c --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,79 @@ +hard_tabs = true + +newline_style = "Auto" +indent_style = "Block" +use_small_heuristics = "Default" +fn_call_width = 60 +attr_fn_like_width = 70 +struct_lit_width = 18 +struct_variant_width = 35 +array_width = 60 +chain_width = 60 +single_line_if_else_max_width = 50 +single_line_let_else_max_width = 50 +wrap_comments = false +format_code_in_doc_comments = false +doc_comment_code_block_width = 100 +comment_width = 80 +normalize_comments = false +normalize_doc_attributes = false +format_strings = false +format_macro_matchers = false +format_macro_bodies = true +skip_macro_invocations = [] +hex_literal_case = "Preserve" +empty_item_single_line = true +struct_lit_single_line = true +fn_single_line = false +where_single_line = false +imports_indent = "Block" +imports_layout = "Mixed" +imports_granularity = "Preserve" +group_imports = "Preserve" +reorder_imports = true +reorder_modules = true +reorder_impl_items = false +type_punctuation_density = "Wide" +space_before_colon = false +space_after_colon = true +spaces_around_ranges = false +binop_separator = "Front" +remove_nested_parens = true +combine_control_expr = true +short_array_element_width_threshold = 10 +overflow_delimited_expr = false +struct_field_align_threshold = 0 +enum_discrim_align_threshold = 0 +match_arm_blocks = true +match_arm_leading_pipes = "Never" +force_multiline_blocks = false +fn_params_layout = "Tall" +brace_style = "SameLineWhere" +control_brace_style = "AlwaysSameLine" +trailing_semicolon = true +trailing_comma = "Vertical" +match_block_trailing_comma = false +blank_lines_upper_bound = 1 +blank_lines_lower_bound = 0 +edition = "2015" +style_edition = "2015" +version = "One" +inline_attribute_width = 0 +format_generated_files = true +generated_marker_line_search_limit = 5 +merge_derives = true +use_try_shorthand = false +use_field_init_shorthand = false +force_explicit_abi = true +condense_wildcard_suffixes = false +color = "Auto" +required_version = "1.8.0" +unstable_features = false +disable_all_formatting = false +skip_children = false +show_parse_errors = true +error_on_line_overflow = false +error_on_unformatted = false +ignore = [] +emit_mode = "Files" +make_backup = false diff --git a/src/attributes.rs b/src/attributes.rs index ee1a71c..04ba42b 100644 --- a/src/attributes.rs +++ b/src/attributes.rs @@ -1,8 +1,10 @@ use std::fmt::Display; +use std::ops::Deref; use deku_derive::DekuRead; use deku::DekuContainerRead; use log::trace; -use crate::class_file::{ClassFile, CpInfo}; +use crate::class_file::{ClassFile, Constant, ConstantPoolEntry}; +use crate::class_file::constant_pool::ConstantPoolExt; #[derive(Clone, PartialEq, Debug, DekuRead)] #[deku(ctx = "_endian: deku::ctx::Endian", endian = "big")] @@ -18,7 +20,7 @@ pub struct AttributeInfo { #[derive(Clone, PartialEq, Debug)] pub enum Attribute { // "Critical" - ConstantValue, + ConstantValue(Constant), Code(CodeAttribute), StackMapTable(Vec), BootstrapMethods, @@ -357,10 +359,14 @@ pub enum Ops { // control #[deku(id = 0xa7)] goto(u16), + + // discontinued #[deku(id = 0xa8)] jsr(u16), #[deku(id = 0xa9)] ret(u8), + // + #[deku(id = 0xaa)] tableswitch, #[deku(id = 0xab)] @@ -441,64 +447,67 @@ pub enum Ops { // } impl AttributeInfo { - pub fn parse_attribute(&self, constant_pool: &[CpInfo]) -> Option { - let name = crate::class_file::pool_get_string(constant_pool, self.attribute_name_index)?; - trace!("Parsing attribute with name: {}", name); - - - match name.as_ref() { - "Code" => { - let (_, mut code_attr) = CodeAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; - // Recursively interpret nested attributes - for attr in &mut code_attr.attributes { - attr.interpreted = attr.parse_attribute(constant_pool); - } - Some(Attribute::Code(code_attr)) - } - "SourceFile" => { - if self.info.len() >= 2 { - let source_file_index = u16::from_be_bytes([self.info[0], self.info[1]]); - Some(Attribute::SourceFile(source_file_index)) - } else { - None - } - } - "LineNumberTable" => { - let (_, lnt) = LineNumberTableAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; - Some(Attribute::LineNumberTable(lnt)) - } - "StackMapTable" => { - Some(Attribute::StackMapTable(self.info.clone())) - } - "Exceptions" => { - Some(Attribute::Exceptions(self.info.clone())) - } - "InnerClasses" => { - Some(Attribute::InnerClasses(self.info.clone())) - } - "Signature" => { - if self.info.len() >= 2 { - let signature_index = u16::from_be_bytes([self.info[0], self.info[1]]); - Some(Attribute::Signature(signature_index)) - } else { - None - } - } - "LocalVariableTable" => { - let (_, lvt) = LocalVariableTableAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; - Some(Attribute::LocalVariableTable(lvt)) - } - _ => Some(Attribute::Unknown(name.to_string(), self.info.clone())), - } - } + // pub fn parse_attribute(&self, constant_pool: &[ConstantPoolEntry]) -> Option { + // let name = crate::class_file::pool_get_string(constant_pool, self.attribute_name_index)?; + // trace!("Parsing attribute with name: {}", name); + // + // + // match name.as_ref() { + // "Code" => { + // let (_, mut code_attr) = CodeAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; + // // Recursively interpret nested attributes + // for attr in &mut code_attr.attributes { + // attr.interpreted = attr.parse_attribute(constant_pool); + // } + // Some(Attribute::Code(code_attr)) + // } + // "SourceFile" => { + // if self.info.len() >= 2 { + // let source_file_index = u16::from_be_bytes([self.info[0], self.info[1]]); + // Some(Attribute::SourceFile(source_file_index)) + // } else { + // None + // } + // } + // "LineNumberTable" => { + // let (_, lnt) = LineNumberTableAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; + // Some(Attribute::LineNumberTable(lnt)) + // } + // "StackMapTable" => { + // Some(Attribute::StackMapTable(self.info.clone())) + // } + // "Exceptions" => { + // Some(Attribute::Exceptions(self.info.clone())) + // } + // "InnerClasses" => { + // Some(Attribute::InnerClasses(self.info.clone())) + // } + // "Signature" => { + // if self.info.len() >= 2 { + // let signature_index = u16::from_be_bytes([self.info[0], self.info[1]]); + // Some(Attribute::Signature(signature_index)) + // } else { + // None + // } + // } + // "LocalVariableTable" => { + // let (_, lvt) = LocalVariableTableAttribute::from_bytes((&self.info.as_slice(), 0)).ok()?; + // Some(Attribute::LocalVariableTable(lvt)) + // } + // _ => Some(Attribute::Unknown(name.to_string(), self.info.clone())), + // } + // } /// Get the interpreted attribute, parsing if necessary pub fn get(&self, class_file: &ClassFile) -> Option { - if let Some(ref attr) = self.interpreted { - Some(attr.clone()) - } else { - self.parse_attribute(class_file.constant_pool.as_ref()) - } + class_file.constant_pool.parse_attribute(self.deref().clone()).ok() + + + // if let Some(ref attr) = self.interpreted { + // Some(attr.clone()) + // } else { + // self.parse_attribute(class_file.constant_pool.as_ref()) + // } } } diff --git a/src/bimage.rs b/src/bimage.rs index e0bedfb..61c3e21 100644 --- a/src/bimage.rs +++ b/src/bimage.rs @@ -6,53 +6,55 @@ const DEFAULT_LOCATION: &str = "./lib/modules"; pub struct Bimage { - image: ArchiveReader, - modules: Vec, + image: ArchiveReader, + modules: Vec, } impl Default for Bimage { - fn default() -> Self { - let reader = ArchiveReader::open(DEFAULT_LOCATION, Default::default()).expect("No image location given, and unable to open/locate default image"); + fn default() -> Self { + let reader = ArchiveReader::open(DEFAULT_LOCATION, Default::default()).expect("No image location given, and unable to open/locate default image"); - let mut modules = reader.archive().files.iter().filter(|e|{ - e.is_directory && - e.name.split("/").count() == 1 - }).map(|e| { e.name.clone() }).collect::>().into_iter().collect::>(); - modules.sort(); - Self { - image: reader, - modules, - } - } + let mut modules = reader.archive().files.iter().filter(|e|{ + e.is_directory && + e.name.split("/").count() == 1 + }).map(|e| { e.name.clone() }).collect::>().into_iter().collect::>(); + modules.sort(); + Self { + image: reader, + modules, + } + } } impl Bimage { - pub fn new(path: impl AsRef) -> Self { - let reader = ArchiveReader::open(path, Default::default()).expect("Unable to find specified bimage."); - Self { - image: reader, - ..Default::default() - } - } + pub fn new(path: impl AsRef) -> Self { + let reader = ArchiveReader::open(path, Default::default()).expect("Unable to find specified bimage."); + Self { + image: reader, + ..Default::default() + } + } - fn resolve_path(module: &str, class: &str) -> String { - let module = if module.is_empty() { "java.base" } else { module }; - let class = Self::d2s(class); - format!("{module}/classes/{class}.class") - } - fn d2s( dots: &str) -> String { - dots.replace(".", "/") - } - fn f2s ( slashes: &str) -> String { - slashes.replace("/", ".") - } + fn resolve_path(module: &str, class: &str) -> String { + let module = if module.is_empty() { "java.base" } else { module }; + let class = Self::d2s(class); + format!("{module}/classes/{class}.class") + } + fn d2s( dots: &str) -> String { + dots.replace(".", "/") + } + fn f2s ( slashes: &str) -> String { + slashes.replace("/", ".") + } - pub fn get_class(&mut self, module: &str, class: &str) -> Vec { - let path = Self::resolve_path(module, class); - self.image.read_file(&path).unwrap() - } + pub fn get_class(&mut self, module: &str, class: &str) -> Option> { + let path = Self::resolve_path(module, class); + self.image.read_file(&path).map_err(|e| { + println!("{}", path); + }).ok() + } } \ No newline at end of file diff --git a/src/class.rs b/src/class.rs index 428190a..41fef98 100644 --- a/src/class.rs +++ b/src/class.rs @@ -1,38 +1,75 @@ -use std::sync::Arc; use crate::attributes::AttributeInfo; -use crate::class_file::{ClassFlags, CpInfo, FieldData, FieldInfo, MethodInfo, MethodData, ClassFile}; +use crate::class_file::{ + ClassFile, ClassFlags, ConstantPoolEntry, FieldData, FieldInfo, FieldRef, MethodData, + MethodInfo, MethodRef, +}; +use crate::{FieldType, MethodDescriptor, VmError}; +use std::sync::{Arc, Mutex}; +use std::thread::ThreadId; + +/// JVM Spec 5.5: Initialization states for a class +#[derive(Debug, Clone, PartialEq)] +pub enum InitState { + /// Verified and prepared but not initialized + NotInitialized, + /// Being initialized by a specific thread + Initializing(ThreadId), + /// Fully initialized and ready for use + Initialized, + /// Initialization failed + Error(String), +} pub struct RuntimeClass { - pub constant_pool: Arc>, + pub constant_pool: Arc>, pub access_flags: ClassFlags, pub this_class: String, - pub super_class: Arc, + pub super_class: Option>, pub interfaces: Vec>, pub fields: Vec, pub methods: Vec, + /// Thread-safe initialization state (JVM Spec 5.5) + pub init_state: Mutex, } -impl From for RuntimeClass { - fn from(value: ClassFile) -> Self { - let constant_pool = value.constant_pool.clone(); - let access_flags = ClassFlags::from(value.access_flags); +impl RuntimeClass { + pub fn find_method(&self, name: &str, desc: MethodDescriptor) -> Result<&MethodData, VmError> { + println!("Finding method"); + if let Some(method) = self.methods.iter().find(|e| { + println!("Method Name Needed: {name}, Checked:{}", e.name); + println!("Method type Needed: {desc:?}, Checked:{:?}", e.desc); + let name_match = e.name.eq(name); + let param_match = desc.parameters == e.desc.parameters; + name_match && param_match + }) { + return Ok(method); + }; - - - - - - - - - Self { - constant_pool, - access_flags, - this_class: "".to_string(), - super_class: Arc::new(RuntimeClass {}), - interfaces: vec![], - fields: vec![], - methods: vec![], + // recurse super class + if let Some(super_class) = &self.super_class { + return super_class.find_method(name, desc); } + // No method found, and we must be Object, as we don't have a superclass + Err(VmError::LoaderError("Failed to find method".to_string())) } -} \ No newline at end of file + + pub fn find_field(&self, name: &str, desc: FieldType) -> Result<&FieldData, VmError> { + println!("Finding field"); + if let Some(field) = self.fields.iter().find(|e| { + println!("Field Name Needed: {name}, Checked:{}", e.name); + println!("Field type Needed: {desc:?}, Checked:{:?}", e.desc); + let name_match = e.name.eq(name); + let type_match = desc == e.desc; + name_match && type_match + }) { + return Ok(field); + }; + + // recurse super class + if let Some(super_class) = &self.super_class { + return super_class.find_field(name, desc); + } + // No field found, and we must be Object, as we don't have a superclass + Err(VmError::LoaderError("Failed to find field".to_string())) + } +} diff --git a/src/class_file/class_file.rs b/src/class_file/class_file.rs index 355e7a9..c073848 100644 --- a/src/class_file/class_file.rs +++ b/src/class_file/class_file.rs @@ -1,14 +1,16 @@ +use crate::attributes::{Attribute, AttributeInfo, CodeAttribute, Ops}; +use crate::class_file::constant_pool::{ConstantPoolError, ConstantPoolExt, ConstantPoolOwned}; +use crate::{BaseType, FieldType, MethodDescriptor, Value}; use deku::ctx::Endian::Big; +use deku::{DekuContainerRead, DekuError}; +use deku_derive::{DekuRead, DekuWrite}; +use itertools::Itertools; use std::borrow::Cow; use std::fmt; +use std::fmt::{Display, Formatter}; use std::ops::Deref; use std::str::Chars; use std::sync::Arc; -use itertools::Itertools; -use deku_derive::{DekuRead, DekuWrite}; -use deku::{DekuContainerRead, DekuError}; -use crate::attributes::{Attribute, AttributeInfo, CodeAttribute, Ops}; -use crate::{BaseType, FieldType, MethodDescriptor, Value}; #[derive(Debug, PartialEq, DekuRead)] #[deku(magic = b"\xCA\xFE\xBA\xBE", endian = "big")] @@ -17,10 +19,10 @@ pub struct ClassFile { pub major_version: u16, constant_pool_count: u16, #[deku( - until = "CpInfo::weighted_count(*constant_pool_count - 1)", - map = "|v: Vec| -> Result<_, DekuError> { Ok(Arc::new(v)) }" + until = "ConstantPoolEntry::weighted_count(*constant_pool_count - 1)", + map = "|v: Vec| -> Result<_, DekuError> { Ok(Arc::new(v)) }" )] - pub constant_pool: Arc>, + pub constant_pool: Arc, pub access_flags: u16, pub this_class: u16, pub super_class: u16, @@ -40,7 +42,7 @@ pub struct ClassFile { #[derive(Clone, PartialEq, Debug, DekuRead)] #[deku(id_type = "u8", ctx = "_endian: deku::ctx::Endian", endian = "big")] -pub enum CpInfo { +pub enum ConstantPoolEntry { #[deku(id = 0x01)] Utf8(ConstantUtf8Info), #[deku(id = 0x03)] @@ -74,15 +76,15 @@ pub enum CpInfo { #[deku(id = 19)] Module(ConstantModuleInfo), #[deku(id = 20)] - Package(ConstantPackageInfo) + Package(ConstantPackageInfo), } -impl CpInfo { +impl ConstantPoolEntry { fn weighted_count(target: u16) -> impl FnMut(&Self) -> bool { let mut count = 0; move |entry: &Self| { count += match entry { - CpInfo::Long(_) | CpInfo::Double(_) => 2, + ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => 2, _ => 1, }; count >= target as usize @@ -116,8 +118,8 @@ pub struct MethodInfo { #[deku(ctx = "_endian: deku::ctx::Endian", endian = "big")] pub struct ConstantUtf8Info { pub length: u16, - #[deku(count="length")] - pub bytes: Vec + #[deku(count = "length")] + pub bytes: Vec, } #[derive(Clone, PartialEq, Debug, DekuRead)] @@ -147,6 +149,14 @@ pub struct ConstantClassInfo { pub name_index: u16, } +impl Deref for ConstantClassInfo { + type Target = u16; + + fn deref(&self) -> &Self::Target { + &self.name_index + } +} + #[derive(Clone, PartialEq, Debug, DekuRead)] #[deku(ctx = "_endian: deku::ctx::Endian", endian = "big")] pub struct ConstantNameAndTypeInfo { @@ -203,14 +213,19 @@ pub struct ConstantPackageInfo { impl fmt::Display for ClassFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Class File Information:")?; - writeln!(f, " Version: {}.{}", self.major_version, self.minor_version)?; + writeln!( + f, + " Version: {}.{}", + self.major_version, self.minor_version + )?; writeln!(f, " Access Flags: 0x{:04X}", self.access_flags)?; writeln!(f, " This Class: #{}", self.this_class)?; writeln!(f, " Super Class: #{}", self.super_class)?; writeln!(f, "\nConstant Pool ({} entries):", self.constant_pool.len())?; for (i, entry) in self.constant_pool().iter().enumerate() { - if let Some(entry_value) = entry - { writeln!(f, " #{}: {}", i, entry_value)?; } + if let Some(entry_value) = entry { + writeln!(f, " #{}: {}", i, entry_value)?; + } } writeln!(f, "\nInterfaces ({}):", self.interfaces.len())?; for interface in &self.interfaces { @@ -218,103 +233,155 @@ impl fmt::Display for ClassFile { } writeln!(f, "\nFields ({}):", self.fields.len())?; for (i, field) in self.fields.iter().enumerate() { - let string_name = self.get_string(field.name_index).unwrap(); + let string_name = &self.constant_pool.get_string(field.name_index).unwrap(); writeln!(f, " [{}:{}] {}", i, string_name, field)?; } writeln!(f, "\nMethods ({}):", self.methods.len())?; for (i, method) in self.methods.iter().enumerate() { - let string_name = self.get_string(method.name_index).unwrap(); + let string_name = self.constant_pool.get_string(method.name_index).unwrap(); writeln!(f, " [{}:{}] {}", i, string_name, method)?; for attribute in &method.attributes { write!(f, " ")?; - self.format_attribute(f, attribute).expect("TODO: panic message"); + self.format_attribute(f, attribute) + .expect("TODO: panic message"); // writeln!(f, " {}", attribute.get(self).unwrap())? } } writeln!(f, "\nAttributes ({}):", self.attributes.len())?; for (i, attr) in self.attributes.iter().enumerate() { - writeln!(f, " [{}] name_index=#{}, length={}::: {:?}", i, attr.attribute_name_index, attr.attribute_length, attr.get(self).unwrap())?; + writeln!( + f, + " [{}] name_index=#{}, length={}::: {:?}", + i, + attr.attribute_name_index, + attr.attribute_length, + attr.get(self).unwrap() + )?; } Ok(()) } - - } -impl fmt::Display for CpInfo { +impl fmt::Display for ConstantPoolEntry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - CpInfo::Utf8(info) => { + ConstantPoolEntry::Utf8(info) => { let s = String::from_utf8_lossy(&info.bytes); write!(f, "Utf8 \"{}\"", s) } - CpInfo::Integer(val) => write!(f, "Integer {}", val), - CpInfo::Float(val) => write!(f, "Float {}", val), - CpInfo::Long(val) => write!(f, "Long {}", val), - CpInfo::Double(val) => write!(f, "Double {}", val), - CpInfo::Class(info) => write!(f, "Class #{}", info.name_index), - CpInfo::String(info) => write!(f, "String #{}", info.string_index), - CpInfo::FieldRef(info) => write!(f, "FieldRef #{}.#{}", info.class_index, info.name_and_type_index), - CpInfo::MethodRef(info) => write!(f, "MethodRef #{}.#{}", info.class_index, info.name_and_type_index), - CpInfo::InterfaceMethodRef(info) => write!(f, "InterfaceMethodRef #{}.#{}", info.class_index, info.name_and_type_index), - CpInfo::NameAndType(info) => write!(f, "NameAndType #{}:#{}", info.name_index, info.descriptor_index), - CpInfo::MethodHandle(info) => write!(f, "MethodHandle kind={} #{}", info.reference_kind, info.reference_index), - CpInfo::MethodType(info) => write!(f, "MethodType #{}", info.descriptor_index), - CpInfo::Dynamic(info) => write!(f, "Dynamic #{}.#{}", info.bootstrap_method_attr_index, info.name_and_type_index), - CpInfo::InvokeDynamic(info) => write!(f, "InvokeDynamic #{}.#{}", info.bootstrap_method_attr_index, info.name_and_type_index), - CpInfo::Module(info) => write!(f, "Module #{}", info.name_index), - CpInfo::Package(info) => write!(f, "Package #{}", info.name_index), + ConstantPoolEntry::Integer(val) => write!(f, "Integer {}", val), + ConstantPoolEntry::Float(val) => write!(f, "Float {}", val), + ConstantPoolEntry::Long(val) => write!(f, "Long {}", val), + ConstantPoolEntry::Double(val) => write!(f, "Double {}", val), + ConstantPoolEntry::Class(info) => write!(f, "Class #{}", info.name_index), + ConstantPoolEntry::String(info) => write!(f, "String #{}", info.string_index), + ConstantPoolEntry::FieldRef(info) => write!( + f, + "FieldRef #{}.#{}", + info.class_index, info.name_and_type_index + ), + ConstantPoolEntry::MethodRef(info) => write!( + f, + "MethodRef #{}.#{}", + info.class_index, info.name_and_type_index + ), + ConstantPoolEntry::InterfaceMethodRef(info) => write!( + f, + "InterfaceMethodRef #{}.#{}", + info.class_index, info.name_and_type_index + ), + ConstantPoolEntry::NameAndType(info) => write!( + f, + "NameAndType #{}:#{}", + info.name_index, info.descriptor_index + ), + ConstantPoolEntry::MethodHandle(info) => write!( + f, + "MethodHandle kind={} #{}", + info.reference_kind, info.reference_index + ), + ConstantPoolEntry::MethodType(info) => { + write!(f, "MethodType #{}", info.descriptor_index) + } + ConstantPoolEntry::Dynamic(info) => write!( + f, + "Dynamic #{}.#{}", + info.bootstrap_method_attr_index, info.name_and_type_index + ), + ConstantPoolEntry::InvokeDynamic(info) => write!( + f, + "InvokeDynamic #{}.#{}", + info.bootstrap_method_attr_index, info.name_and_type_index + ), + ConstantPoolEntry::Module(info) => write!(f, "Module #{}", info.name_index), + ConstantPoolEntry::Package(info) => write!(f, "Package #{}", info.name_index), } } } impl fmt::Display for FieldInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "flags=0x{:04X}, name=#{}, descriptor=#{}, attrs={}", - self.access_flags, self.name_index, self.descriptor_index, self.attributes.len()) + write!( + f, + "flags=0x{:04X}, name=#{}, descriptor=#{}, attrs={}", + self.access_flags, + self.name_index, + self.descriptor_index, + self.attributes.len() + ) } } impl fmt::Display for MethodInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let attrs: Vec<_> = self.attributes.iter().map(|x| { x.attribute_name_index }).collect(); - write!(f, "flags=0x{:04X}, name=#{}, descriptor=#{}, attrs={}:{:?}", - self.access_flags, self.name_index, self.descriptor_index, self.attributes.len(), attrs) + let attrs: Vec<_> = self + .attributes + .iter() + .map(|x| x.attribute_name_index) + .collect(); + write!( + f, + "flags=0x{:04X}, name=#{}, descriptor=#{}, attrs={}:{:?}", + self.access_flags, + self.name_index, + self.descriptor_index, + self.attributes.len(), + attrs + ) } } impl ClassFile { - /// Parse with interpreted attributes - pub fn from_bytes_interpreted(input: (&[u8], usize)) -> Result<((&[u8], usize), Self), DekuError> { - let (rest, mut class_file) = Self::from_bytes(input)?; - - // Interpret all attributes in-place - for field in &mut class_file.fields { - for attr in &mut field.attributes { - attr.interpreted = attr.parse_attribute(&class_file.constant_pool); - } - } - - for method in &mut class_file.methods { - for attr in &mut method.attributes { - attr.interpreted = attr.parse_attribute(&class_file.constant_pool); - } - } - - for attr in &mut class_file.attributes { - attr.interpreted = attr.parse_attribute(&class_file.constant_pool); - } - - Ok((rest, class_file)) - } - pub fn constant_pool(&self) -> Vec> { + // pub fn from_bytes_interpreted(input: (&[u8], usize)) -> Result<((&[u8], usize), Self), DekuError> { + // let (rest, mut class_file) = Self::from_bytes(input)?; + // + // // Interpret all attributes in-place + // for field in &mut class_file.fields { + // for attr in &mut field.attributes { + // attr.interpreted = attr.parse_attribute(&class_file.constant_pool); + // } + // } + // + // for method in &mut class_file.methods { + // for attr in &mut method.attributes { + // attr.interpreted = attr.parse_attribute(&class_file.constant_pool); + // } + // } + // + // for attr in &mut class_file.attributes { + // attr.interpreted = attr.parse_attribute(&class_file.constant_pool); + // } + // + // Ok((rest, class_file)) + // } + pub fn constant_pool(&self) -> Vec> { let mut expanded = vec![None]; // Index 0 is unused in JVM for entry in self.constant_pool.as_ref() { expanded.push(Some(entry.clone())); match entry { - CpInfo::Long(_) | CpInfo::Double(_) => { + ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => { expanded.push(None); // Phantom entry } _ => {} @@ -324,32 +391,10 @@ impl ClassFile { expanded } - pub fn get_constant(&self, index: u16) -> Option<&CpInfo> { - // More efficient: calculate actual index - /*let mut current_index = 1u16; - for entry in &self.constant_pool { - if current_index == index { - return Some(entry); - } - current_index += match entry { - CpInfo::Long(_) | CpInfo::Double(_) => 2, - _ => 1, - }; - } - None*/ - self.constant_pool.get_constant(index) - } - - pub fn get_string(&self, index: u16) -> Result { - self.constant_pool.get_string(index) - // if let Some(CpInfo::Utf8(utf)) = self.get_constant(index) { - // return Some(String::from_utf8_lossy(&utf.bytes)); - // } - // None - } - fn format_attribute(&self, f: &mut fmt::Formatter<'_>, attr: &AttributeInfo) -> fmt::Result { - let attribute = attr.get(self).unwrap_or_else(|| panic!("Failed to parse attribute {}", attr)); + let attribute = attr + .get(self) + .unwrap_or_else(|| panic!("Failed to parse attribute {}", attr)); match &attribute { Attribute::Code(code_attr) => { writeln!(f, " {}", attribute)?; @@ -389,7 +434,7 @@ impl ClassFile { } } - pub fn get_code(&self, method_ref_data: MethodData) -> Result { + /*pub fn get_code(&self, method_ref_data: MethodRef) -> Result { for info in self.methods.iter() { let data = self.constant_pool.resolve_method_info(info)?; let is_same_method_name = data.name.eq(&method_ref_data.name); @@ -403,8 +448,8 @@ impl ClassFile { } } } - Err(()) - } + Err("Failed to find bytecode for method".to_string().into()) + }*/ // pub fn get_static_field_value(&self, field_ref: &FieldData) -> Value { // for info in self.fields.iter() { @@ -418,7 +463,10 @@ impl ClassFile { // } } -pub fn pool_get_constant(constant_pool: &[CpInfo], index: u16) -> Option<&CpInfo> { +pub fn pool_get_constant( + constant_pool: &[ConstantPoolEntry], + index: u16, +) -> Option<&ConstantPoolEntry> { // More efficient: calculate actual index let mut current_index = 1u16; for entry in constant_pool { @@ -426,15 +474,15 @@ pub fn pool_get_constant(constant_pool: &[CpInfo], index: u16) -> Option<&CpInfo return Some(entry); } current_index += match entry { - CpInfo::Long(_) | CpInfo::Double(_) => 2, + ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => 2, _ => 1, }; } None } -pub fn pool_get_string(constant_pool: &[CpInfo], index: u16) -> Option> { - if let Some(CpInfo::Utf8(utf)) = pool_get_constant(constant_pool, index) { +pub fn pool_get_string(constant_pool: &[ConstantPoolEntry], index: u16) -> Option> { + if let Some(ConstantPoolEntry::Utf8(utf)) = pool_get_constant(constant_pool, index) { return Some(String::from_utf8_lossy(&utf.bytes)); } None @@ -448,140 +496,163 @@ pub(crate) struct Bytecode { pub code: Vec, } - - -pub trait ConstantPoolExt { - fn get_constant(&self, index: u16) -> Option<&CpInfo>; - fn get_string(&self, index: u16) -> Result; - fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()>; - fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()>; - fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()>; - fn resolve_field(&self, index: u16) -> Result; - fn resolve_method_ref(&self, index: u16) -> Result; - fn resolve_method_info(&self, method: &MethodInfo) -> Result; - fn resolve_field_info(&self, field: &FieldInfo) -> Result; -} - -impl ConstantPoolExt for [CpInfo] { - fn get_constant(&self, index: u16) -> Option<&CpInfo> { - let mut current_index = 1u16; - for entry in self { - if current_index == index { - return Some(entry); - } - current_index += match entry { - CpInfo::Long(_) | CpInfo::Double(_) => 2, - _ => 1, - }; - } - None - } - - fn get_string(&self, index: u16) -> Result { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::Utf8(data) => { - String::from_utf8(data.bytes.clone()).map_err(|e| ()) - }, - _ => Err(()), - } - } - - fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::FieldRef(data) => Ok(data), - _ => Err(()), - } - } - - fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::Class(data) => Ok(data), - _ => Err(()), - } - } - fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::NameAndType(data) => Ok(data), - _ => Err(()), - } - } - - fn resolve_field(&self, index: u16) -> Result { - if let Some(CpInfo::FieldRef(fr)) = self.get_constant(index) { - let class = self.get_class(fr.class_index)?; - let class = self.get_string(class.name_index)?; - let name_and_type = self.get_name_and_type(fr.name_and_type_index)?; - let name = self.get_string(name_and_type.name_index)?; - let desc = self.get_string(name_and_type.descriptor_index)?; - let desc = FieldType::parse(&desc)?; - Ok(FieldData { - class, - name, - desc, - }) - } else { Err(()) } - } - - fn resolve_method_ref(&self, index: u16) -> Result { - if let Some(CpInfo::MethodRef(mr)) = self.get_constant(index) { - let class = self.get_class(mr.class_index)?; - let class = self.get_string(class.name_index)?; - let name_and_type = self.get_name_and_type(mr.name_and_type_index)?; - let name = self.get_string(name_and_type.name_index)?; - let desc = self.get_string(name_and_type.descriptor_index)?; - let desc = MethodDescriptor::parse(&desc)?; - Ok(MethodData { - class, - name, - desc, - }) - } else { Err(()) } - } - - // (name, desc) - fn resolve_method_info(&self, method: &MethodInfo) -> Result { - let desc = self.get_string(method.descriptor_index)?; - let desc = MethodDescriptor::parse(&desc)?; - let name = self.get_string(method.name_index)?; - Ok(MethodData { - class: "".to_string(), - name, - desc, - }) - } - - fn resolve_field_info(&self, field: &FieldInfo) -> Result { - let desc = self.get_string(field.descriptor_index)?; - let desc = FieldType::parse(&desc)?; - let name = self.get_string(field.name_index)?; - Ok(FieldData { - class: "".to_string(), - name, - desc, - }) - } -} - +// pub trait ConstantPoolExt { +// fn get_constant(&self, index: u16) -> Result<&ConstantPoolEntry, ()>; +// fn get_string(&self, index: u16) -> Result; +// fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()>; +// fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()>; +// fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()>; +// fn resolve_field(&self, index: u16) -> Result; +// fn resolve_method_ref(&self, index: u16) -> Result; +// fn resolve_method_info(&self, method: &MethodInfo) -> Result; +// fn resolve_field_info(&self, field: &FieldInfo) -> Result; +// } +// +// impl ConstantPoolExt for [ConstantPoolEntry] { +// fn get_constant(&self, index: u16) -> Result<&ConstantPoolEntry, ()> { +// let mut current_index = 1u16; +// for entry in self { +// if current_index == index { +// return Ok(entry); +// } +// current_index += match entry { +// ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => 2, +// _ => 1, +// }; +// } +// Err(()) +// } +// +// fn get_string(&self, index: u16) -> Result { +// let cp_entry = self.get_constant(index)?; +// match cp_entry { +// ConstantPoolEntry::Utf8(data) => { +// String::from_utf8(data.bytes.clone()).map_err(|e| ()) +// }, +// _ => Err(()), +// } +// } +// +// fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()> { +// let cp_entry = self.get_constant(index)?; +// match cp_entry { +// ConstantPoolEntry::FieldRef(data) => Ok(data), +// _ => Err(()), +// } +// } +// +// fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()> { +// let cp_entry = self.get_constant(index)?; +// match cp_entry { +// ConstantPoolEntry::Class(data) => Ok(data), +// _ => Err(()), +// } +// } +// fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()> { +// let cp_entry = self.get_constant(index)?; +// match cp_entry { +// ConstantPoolEntry::NameAndType(data) => Ok(data), +// _ => Err(()), +// } +// } +// +// fn resolve_field(&self, index: u16) -> Result { +// let fr = self.get_field(index)?; +// let class = self.get_class(fr.class_index)?; +// let class = self.get_string(class.name_index)?; +// let name_and_type = self.get_name_and_type(fr.name_and_type_index)?; +// let name = self.get_string(name_and_type.name_index)?; +// let desc = self.get_string(name_and_type.descriptor_index)?; +// let desc = FieldType::parse(&desc)?; +// Ok(FieldData { +// class, +// name, +// desc, +// }) +// } +// +// fn resolve_method_ref(&self, index: u16) -> Result { +// if let ConstantPoolEntry::MethodRef(mr) = self.get_constant(index)? { +// let class = self.get_class(mr.class_index)?; +// let class = self.get_string(class.name_index)?; +// let name_and_type = self.get_name_and_type(mr.name_and_type_index)?; +// let name = self.get_string(name_and_type.name_index)?; +// let desc = self.get_string(name_and_type.descriptor_index)?; +// let desc = MethodDescriptor::parse(&desc)?; +// Ok(MethodData { +// class, +// name, +// desc, +// code: None, +// }) +// } else { Err(()) } +// } +// +// // (name, desc) +// fn resolve_method_info(&self, method: &MethodInfo) -> Result { +// let desc = self.get_string(method.descriptor_index)?; +// let desc = MethodDescriptor::parse(&desc)?; +// let name = self.get_string(method.name_index)?; +// Ok(MethodData { +// class: "".to_string(), +// name, +// desc, +// code: None, +// }) +// } +// +// fn resolve_field_info(&self, field: &FieldInfo) -> Result { +// let desc = self.get_string(field.descriptor_index)?; +// let desc = FieldType::parse(&desc)?; +// let name = self.get_string(field.name_index)?; +// Ok(FieldData { +// class: "".to_string(), +// name, +// desc, +// }) +// } +// } #[derive(Debug)] -pub struct MethodData { +pub struct MethodRef { pub class: String, pub name: String, pub desc: MethodDescriptor, - pub code: Option +} +#[derive(Debug, Clone)] +pub struct MethodData { + pub name: String, + pub desc: MethodDescriptor, + pub code: Option, + pub flags: MethodFlags, + // pub exceptions: Option<_>, + // pub visible_annotations: Option<_>, + // pub invisible_annotations: Option<_>, + // pub default_annotation: Option<_>, + // pub method_parameters: Option<_> } - - #[derive(Debug)] -pub struct FieldData { +pub struct FieldRef { pub class: String, pub name: String, - pub desc: FieldType + pub desc: FieldType, +} + +pub struct FieldData { + pub name: String, + pub flags: FieldFlags, + pub desc: FieldType, + pub value: Option, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Constant { + Int(i32), + Long(i64), + Float(f32), + Double(f64), + String(String), } #[allow(non_snake_case)] @@ -589,23 +660,23 @@ pub struct FieldData { pub struct ClassFlags { // flags #[deku(bits = 1)] - MODULE: bool, + pub MODULE: bool, #[deku(bits = 1)] - ENUM: bool, + pub ENUM: bool, #[deku(bits = 1)] - ANNOTATION: bool, + pub ANNOTATION: bool, #[deku(bits = 1)] - SYNTHETIC: bool, + pub SYNTHETIC: bool, #[deku(bits = 1, pad_bits_before = "1")] - ABSTRACT: bool, + pub ABSTRACT: bool, #[deku(bits = 1)] - INTERFACE: bool, + pub INTERFACE: bool, #[deku(bits = 1, pad_bits_before = "3")] - SUPER: bool, + pub SUPER: bool, #[deku(bits = 1)] - FINAL: bool, + pub FINAL: bool, #[deku(bits = 1, pad_bits_before = "3")] - PUBLIC: bool, + pub PUBLIC: bool, } impl From for ClassFlags { @@ -615,8 +686,6 @@ impl From for ClassFlags { } } - - #[allow(non_snake_case)] #[derive(Debug, PartialEq, DekuRead, DekuWrite)] pub struct ModuleFlags { @@ -668,32 +737,32 @@ impl From for FieldFlags { } #[allow(non_snake_case)] -#[derive(Debug, PartialEq, DekuRead, DekuWrite)] +#[derive(Debug, PartialEq, DekuRead, DekuWrite, Clone)] pub struct MethodFlags { #[deku(bits = 1, pad_bits_before = "3")] - ACC_SYNTHETIC: bool, + pub ACC_SYNTHETIC: bool, #[deku(bits = 1)] - ACC_STRICT: bool, + pub ACC_STRICT: bool, #[deku(bits = 1)] - ACC_ABSTRACT: bool, + pub ACC_ABSTRACT: bool, #[deku(bits = 1, pad_bits_before = "1")] - ACC_NATIVE: bool, + pub ACC_NATIVE: bool, #[deku(bits = 1)] - ACC_VARARGS: bool, + pub ACC_VARARGS: bool, #[deku(bits = 1)] - ACC_BRIDGE: bool, + pub ACC_BRIDGE: bool, #[deku(bits = 1)] - ACC_SYNCHRONIZED: bool, + pub ACC_SYNCHRONIZED: bool, #[deku(bits = 1)] - ACC_FINAL: bool, + pub ACC_FINAL: bool, #[deku(bits = 1)] - ACC_STATIC: bool, + pub ACC_STATIC: bool, #[deku(bits = 1)] - ACC_PROTECTED: bool, + pub ACC_PROTECTED: bool, #[deku(bits = 1)] - ACC_PRIVATE: bool, + pub ACC_PRIVATE: bool, #[deku(bits = 1)] - ACC_PUBLIC: bool, + pub ACC_PUBLIC: bool, } impl From for MethodFlags { @@ -703,12 +772,11 @@ impl From for MethodFlags { } } - //yoinked because im monkled impl MethodDescriptor { /// Parses a method descriptor as specified in the JVM specs: /// https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.3.3 - pub fn parse(descriptor: &str) -> Result { + pub fn parse(descriptor: &str) -> Result { let mut chars = descriptor.chars(); match chars.next() { Some('(') => { @@ -720,17 +788,17 @@ impl MethodDescriptor { return_type, }) } else { - Err(()) + Err(DescParseError) } } - _ => Err(()), + _ => Err(DescParseError), } } fn parse_parameters( descriptor: &str, chars: &mut Chars, - ) -> Result, ()> { + ) -> Result, DescParseError> { let mut parameters = Vec::new(); loop { match chars.clone().next() { @@ -739,7 +807,7 @@ impl MethodDescriptor { let param = FieldType::parse_from(descriptor, chars)?; parameters.push(param); } - None => return Err(()), + None => return Err(DescParseError), } } } @@ -747,7 +815,7 @@ impl MethodDescriptor { fn parse_return_type( descriptor: &str, chars: &mut Chars, - ) -> Result, ()> { + ) -> Result, DescParseError> { match chars.clone().next() { Some('V') => Ok(None), Some(_) => { @@ -755,10 +823,10 @@ impl MethodDescriptor { if chars.next().is_none() { Ok(return_type) } else { - Err(()) + Err(DescParseError) } } - _ => Err(()), + _ => Err(DescParseError), } } @@ -766,24 +834,30 @@ impl MethodDescriptor { self.parameters.len() } } +#[derive(Debug)] +pub struct DescParseError; + +impl Display for DescParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "Failed to parse field descriptor") + } +} impl FieldType { - pub fn parse(type_descriptor: &str) -> Result { + pub fn parse(type_descriptor: &str) -> Result { let mut chars = type_descriptor.chars(); let descriptor = Self::parse_from(type_descriptor, &mut chars)?; match chars.next() { None => Ok(descriptor), - Some(_) => Err(()), + Some(_) => Err(DescParseError), } } pub(crate) fn parse_from( type_descriptor: &str, chars: &mut Chars, - ) -> Result { - let first_char = chars - .next() - .ok_or(())?; + ) -> Result { + let first_char = chars.next().ok_or(DescParseError)?; Ok(match first_char { 'B' => FieldType::Base(BaseType::Byte), @@ -798,14 +872,14 @@ impl FieldType { let class_name: String = chars.take_while_ref(|c| *c != ';').collect(); match chars.next() { Some(';') => FieldType::ClassType(class_name), - _ => return Err(()), + _ => return Err(DescParseError), } } '[' => { let component_type = Self::parse_from(type_descriptor, chars)?; FieldType::ArrayType(Box::new(component_type)) } - _ => return Err(()), + _ => return Err(DescParseError), }) } -} \ No newline at end of file +} diff --git a/src/class_file/constant_pool.rs b/src/class_file/constant_pool.rs index 8cac1b2..3215c32 100644 --- a/src/class_file/constant_pool.rs +++ b/src/class_file/constant_pool.rs @@ -1,117 +1,228 @@ +use std::fmt::{Display, Formatter}; use std::ops::Deref; -use crate::class_file::{ConstantClassInfo, ConstantFieldrefInfo, ConstantNameAndTypeInfo, ConstantPoolExt, CpInfo, FieldData, FieldInfo, MethodData, MethodInfo}; -use crate::{FieldType, MethodDescriptor}; +use std::str::FromStr; +use std::sync::Arc; +use deku::DekuContainerRead; +use log::trace; +use crate::class_file::{ConstantClassInfo, ConstantFieldrefInfo, ConstantNameAndTypeInfo, ConstantPoolEntry, FieldRef, FieldInfo, MethodRef, MethodInfo, ConstantUtf8Info, ConstantStringInfo, ConstantMethodrefInfo, ConstantInterfaceMethodrefInfo, ConstantMethodHandleInfo, ConstantMethodTypeInfo, ConstantDynamicInfo, ConstantInvokeDynamicInfo, ConstantModuleInfo, ConstantPackageInfo, DescParseError}; +use crate::{pool_get_impl, FieldType, MethodDescriptor, VmError}; +use crate::attributes::{Attribute, AttributeInfo, CodeAttribute, LineNumberTableAttribute, LocalVariableTableAttribute}; -struct ConstantPool<'a>(&'a [CpInfo]); +pub type ConstantPoolSlice = [ConstantPoolEntry]; +pub type ConstantPoolOwned = Vec; -impl Deref for ConstantPool<'_> { - type Target = [CpInfo]; +impl ConstantPoolExt for ConstantPoolSlice {} - fn deref(&self) -> &Self::Target { - self.0 - } +pub trait ConstantPoolExt: ConstantPoolGet { + // fn get_constant(&self, index: u16) -> Option<&ConstantPoolEntry> { + // let mut current_index = 1u16; + // for entry in self { + // if current_index == index { + // return Some(entry); + // } + // current_index += match entry { + // ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => 2, + // _ => 1, + // }; + // } + // None + // } + // + fn get_string(&self, index: u16) -> Result { + let cp_entry = self.get_utf8_info(index)?; + + String::from_utf8(cp_entry.bytes.clone()).map_err(|e| { e.to_string().into() }) + } + // + // fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()> { + // let cp_entry = self.get_constant(index).ok_or(())?; + // match cp_entry { + // ConstantPoolEntry::FieldRef(data) => Ok(data), + // _ => Err(()), + // } + // } + // + // fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()> { + // let cp_entry = self.get_constant(index).ok_or(())?; + // match cp_entry { + // ConstantPoolEntry::Class(data) => Ok(data), + // _ => Err(()), + // } + // } + // fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()> { + // let cp_entry = self.get_constant(index).ok_or(())?; + // match cp_entry { + // ConstantPoolEntry::NameAndType(data) => Ok(data), + // _ => Err(()), + // } + // } + + fn resolve_field(&self, index: u16) -> Result { + let fr = self.get_field_ref(index)?; + let class = self.get_class_info(fr.class_index)?; + let class = self.get_string(class.name_index)?; + let name_and_type = self.get_name_and_type_info(fr.name_and_type_index)?; + let name = self.get_string(name_and_type.name_index)?; + let desc = self.get_string(name_and_type.descriptor_index)?; + let desc = FieldType::parse(&desc)?; + Ok(FieldRef { + class, + name, + desc, + }) + } + + fn resolve_method_ref(&self, index: u16) -> Result { + let mr = self.get_method_ref(index)?; + let class = self.get_class_info(mr.class_index)?; + let class = self.get_string(class.name_index)?; + let name_and_type = self.get_name_and_type_info(mr.name_and_type_index)?; + let name = self.get_string(name_and_type.name_index)?; + let desc = self.get_string(name_and_type.descriptor_index)?; + let desc = MethodDescriptor::parse(&desc)?; + Ok(MethodRef { + class, + name, + desc, + }) + } + + /*// (name, desc) + fn resolve_method_info(&self, method: &MethodInfo) -> Result { + let desc = self.get_string(method.descriptor_index)?; + let desc = MethodDescriptor::parse(&desc)?; + let name = self.get_string(method.name_index)?; + Ok(MethodData { + class: "".to_string(), + name, + desc, + code: None, + }) + }*/ + + fn resolve_field_info(&self, field: &FieldInfo) -> Result { + let desc = self.get_string(field.descriptor_index)?; + let desc = FieldType::parse(&desc)?; + let name = self.get_string(field.name_index)?; + Ok(FieldRef { + class: "".to_string(), + name, + desc, + }) + } + + + + fn parse_attribute(&self, a: AttributeInfo) -> Result { + let name = self.get_string(a.attribute_name_index)?; + trace!("Parsing attribute with name: {}", name); + + + match name.as_ref() { + "Code" => { + let (_, mut code_attr) = CodeAttribute::from_bytes((a.info.as_slice(), 0))?; + Ok(Attribute::Code(code_attr)) + } + "SourceFile" => { + let source_file_index = u16::from_be_bytes([a.info[0], a.info[1]]); + Ok(Attribute::SourceFile(source_file_index)) + } + "LineNumberTable" => { + let (_, lnt) = LineNumberTableAttribute::from_bytes((&a.info.as_slice(), 0))?; + Ok(Attribute::LineNumberTable(lnt)) + } + "StackMapTable" => { + Ok(Attribute::StackMapTable(a.info.clone())) + } + "Exceptions" => { + Ok(Attribute::Exceptions(a.info.clone())) + } + "InnerClasses" => { + Ok(Attribute::InnerClasses(a.info.clone())) + } + "Signature" => { + let signature_index = u16::from_be_bytes([a.info[0], a.info[1]]); + Ok(Attribute::Signature(signature_index)) + } + "LocalVariableTable" => { + let (_, lvt) = LocalVariableTableAttribute::from_bytes((&a.info.as_slice(), 0))?; + Ok(Attribute::LocalVariableTable(lvt)) + } + _ => Ok(Attribute::Unknown(name.to_string(), a.info.clone())), + } + } } -impl ConstantPool<'_> { - fn get_constant(&self, index: u16) -> Option<&CpInfo> { - let mut current_index = 1u16; - for entry in self { - if current_index == index { - return Some(entry); - } - current_index += match entry { - CpInfo::Long(_) | CpInfo::Double(_) => 2, - _ => 1, - }; - } - None - } +// pub trait ConstantPoolGet { +// fn get_i32(&self, index: u16) -> Result<&i32, ConstantPoolError>; +// fn get_f32(&self, index: u16) -> Result<&f32, ConstantPoolError>; +// fn get_i64(&self, index: u16) -> Result<&i64, ConstantPoolError>; +// fn get_f64(&self, index: u16) -> Result<&f64, ConstantPoolError>; +// fn get_utf8_info(&self, index: u16) -> Result<&ConstantUtf8Info, ConstantPoolError>; +// fn get_class_info(&self, index: u16) -> Result<&ConstantClassInfo, ConstantPoolError>; +// fn get_string_info(&self, index: u16) -> Result<&ConstantStringInfo, ConstantPoolError>; +// fn get_field_ref(&self, index: u16) -> Result<&ConstantFieldrefInfo, ConstantPoolError>; +// fn get_method_ref(&self, index: u16) -> Result<&ConstantMethodrefInfo, ConstantPoolError>; +// fn get_interface_method_ref(&self, index: u16) -> Result<&ConstantInterfaceMethodrefInfo, ConstantPoolError>; +// fn get_name_and_type_info(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ConstantPoolError>; +// fn get_method_handle_info(&self, index: u16) -> Result<&ConstantMethodHandleInfo, ConstantPoolError>; +// fn get_method_type_info(&self, index: u16) -> Result<&ConstantMethodTypeInfo, ConstantPoolError>; +// fn get_dynamic_info(&self, index: u16) -> Result<&ConstantDynamicInfo, ConstantPoolError>; +// fn get_invoke_dynamic_info(&self, index: u16) -> Result<&ConstantInvokeDynamicInfo, ConstantPoolError>; +// fn get_module_info(&self, index: u16) -> Result<&ConstantModuleInfo, ConstantPoolError>; +// fn get_package_info(&self, index: u16) -> Result<&ConstantPackageInfo, ConstantPoolError>; +// } - fn get_string(&self, index: u16) -> Result { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::Utf8(data) => { - String::from_utf8(data.bytes.clone()).map_err(|e| ()) - }, - _ => Err(()), - } - } +pub trait ConstantPoolGet: AsRef<[ConstantPoolEntry]> { + fn get_constant(&self, index: u16) -> Result<&ConstantPoolEntry, ConstantPoolError> { + let mut current_index = 1u16; + for entry in self.as_ref() { + if current_index == index { + return Ok(entry); + } + current_index += match entry { + ConstantPoolEntry::Long(_) | ConstantPoolEntry::Double(_) => 2, + _ => 1, + }; + } + Err("No constant pool entry at that index".to_string().into()) + } + pool_get_impl!(get_i32 => i32, Integer); + pool_get_impl!(get_f32 => f32, Float); + pool_get_impl!(get_i64 => i64, Long); + pool_get_impl!(get_f64 => f64, Double); + pool_get_impl!(get_utf8_info => ConstantUtf8Info, Utf8); + pool_get_impl!(get_class_info => ConstantClassInfo, Class); + pool_get_impl!(get_string_info => ConstantStringInfo, String); + pool_get_impl!(get_field_ref => ConstantFieldrefInfo, FieldRef); + pool_get_impl!(get_method_ref => ConstantMethodrefInfo, MethodRef); + pool_get_impl!(get_interface_method_ref => ConstantInterfaceMethodrefInfo, InterfaceMethodRef); + pool_get_impl!(get_name_and_type_info => ConstantNameAndTypeInfo, NameAndType); + pool_get_impl!(get_method_handle_info => ConstantMethodHandleInfo, MethodHandle); + pool_get_impl!(get_method_type_info => ConstantMethodTypeInfo, MethodType); + pool_get_impl!(get_dynamic_info => ConstantDynamicInfo, Dynamic); + pool_get_impl!(get_invoke_dynamic_info => ConstantInvokeDynamicInfo, InvokeDynamic); + pool_get_impl!(get_module_info => ConstantModuleInfo, Module); + pool_get_impl!(get_package_info => ConstantPackageInfo, Package); +} - fn get_field(&self, index: u16) -> Result<&ConstantFieldrefInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::FieldRef(data) => Ok(data), - _ => Err(()), - } - } +impl ConstantPoolGet for [ConstantPoolEntry] {} - fn get_class(&self, index: u16) -> Result<&ConstantClassInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::Class(data) => Ok(data), - _ => Err(()), - } - } - fn get_name_and_type(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, ()> { - let cp_entry = self.get_constant(index).ok_or(())?; - match cp_entry { - CpInfo::NameAndType(data) => Ok(data), - _ => Err(()), - } - } +impl Display for ConstantPoolError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} +#[derive(Debug)] +pub struct ConstantPoolError(String); +impl From for ConstantPoolError { + fn from(value: String) -> Self { + Self(value) + } +} - fn resolve_field(&self, index: u16) -> Result { - if let Some(CpInfo::FieldRef(fr)) = self.get_constant(index) { - let class = self.get_class(fr.class_index)?; - let class = self.get_string(class.name_index)?; - let name_and_type = self.get_name_and_type(fr.name_and_type_index)?; - let name = self.get_string(name_and_type.name_index)?; - let desc = self.get_string(name_and_type.descriptor_index)?; - let desc = FieldType::parse(&desc)?; - Ok(FieldData { - class, - name, - desc, - }) - } else { Err(()) } - } - - fn resolve_method_ref(&self, index: u16) -> Result { - if let Some(CpInfo::MethodRef(mr)) = self.get_constant(index) { - let class = self.get_class(mr.class_index)?; - let class = self.get_string(class.name_index)?; - let name_and_type = self.get_name_and_type(mr.name_and_type_index)?; - let name = self.get_string(name_and_type.name_index)?; - let desc = self.get_string(name_and_type.descriptor_index)?; - let desc = MethodDescriptor::parse(&desc)?; - Ok(MethodData { - class, - name, - desc, - }) - } else { Err(()) } - } - - // (name, desc) - fn resolve_method_info(&self, method: &MethodInfo) -> Result { - let desc = self.get_string(method.descriptor_index)?; - let desc = MethodDescriptor::parse(&desc)?; - let name = self.get_string(method.name_index)?; - Ok(MethodData { - class: "".to_string(), - name, - desc, - }) - } - - fn resolve_field_info(&self, field: &FieldInfo) -> Result { - let desc = self.get_string(field.descriptor_index)?; - let desc = FieldType::parse(&desc)?; - let name = self.get_string(field.name_index)?; - Ok(FieldData { - class: "".to_string(), - name, - desc, - }) - } -} \ No newline at end of file +impl From for ConstantPoolError { + fn from(value: DescParseError) -> Self { + value.to_string().into() + } +} diff --git a/src/class_loader.rs b/src/class_loader.rs index 32a143c..618bcc7 100644 --- a/src/class_loader.rs +++ b/src/class_loader.rs @@ -4,11 +4,14 @@ use std::fs::File; use std::io::Read; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; +use deku::DekuContainerRead; use log::warn; +use crate::attributes::Attribute; use crate::bimage::Bimage; use crate::class::RuntimeClass; -use crate::class_file::{ClassFile, ClassFlags, ConstantClassInfo, ConstantPoolExt, CpInfo}; - +use crate::class_file::{ClassFile, ClassFlags, ConstantClassInfo, ConstantPoolEntry, FieldData, FieldFlags, MethodData, MethodFlags}; +use crate::class_file::constant_pool::{ConstantPoolExt, ConstantPoolGet}; +use crate::{FieldType, MethodDescriptor}; pub type LoaderRef = Arc>; @@ -74,8 +77,9 @@ pub fn resolve_path(what: &str) -> Result<(PathBuf, String), String> { /// ``` #[derive(Default)] pub struct ClassLoader { - classes: HashMap>, + classes: HashMap>, bimage: Bimage, + pub needs_init: Vec> } impl ClassLoader { @@ -119,12 +123,12 @@ impl ClassLoader { /// } /// } /// ``` - pub fn get_or_load(&mut self, what: &str) -> Result, String> { + pub fn get_or_load(&mut self, what: &str) -> Result, String> { if let Some(class) = self.classes.get(what) { return Ok(class.clone()); } let class = self.load_class(what)?; - self.classes.insert(what.to_string(), class.clone()); + self.needs_init.push(class.clone()); Ok(class) } @@ -132,49 +136,121 @@ impl ClassLoader { self.classes.clone() }*/ - fn load_class(&mut self, what: &str) -> Result, String> { - let (module, class_fqn) = what.split_once("/").unwrap_or(("", what)); - let bytes = self.bimage.get_class(module, class_fqn); - let (_, cf) = ClassFile::from_bytes_interpreted((bytes.as_ref(), 0)) + fn load_class(&mut self, what: &str) -> Result, String> { + let (module, class_fqn) = ("", what); + let bytes = self.bimage.get_class(module, class_fqn).unwrap_or_else(|| { + let path = format!("./data/{what}.class"); + println!("{}", path); + let mut class_file = File::open(path).unwrap(); + let mut bytes = Vec::new(); + class_file.read_to_end(&mut bytes).unwrap(); + bytes + }); + let (_, cf) = ClassFile::from_bytes((bytes.as_ref(), 0)) .map_err(|e| format!("failed to parse class file: {}", e))?; - let arced = Arc::new(cf); + let runtime = self.runtime_class(cf); + let arced = Arc::new(runtime); let option = self.classes.insert(class_fqn.to_string(), arced.clone()); if option.is_some() { warn!("Replaced loaded class: {}", class_fqn) } Ok(arced) } - - - - fn runtime_class(&self, class_file: ClassFile) -> RuntimeClass { + fn runtime_class(&mut self, class_file: ClassFile) -> RuntimeClass { let constant_pool = class_file.constant_pool.clone(); let access_flags = ClassFlags::from(class_file.access_flags); let this_class = { - let this_class_info = class_file.constant_pool.get_constant(class_file.this_class) - if let Some(CpInfo::Class(class_info)) = this_class_info { - class_file.constant_pool.get_string(class_info.name_index) + let cl = class_file.constant_pool.get_class_info(class_file.this_class).unwrap(); + let name = class_file.constant_pool.get_string(cl.name_index).unwrap(); + name + }; + let super_class = { + if (this_class.eq("java/lang/Object")) + { + debug_assert_eq!(this_class, "java/lang/Object"); + debug_assert_eq!(class_file.super_class, 0u16); + None + } else { + debug_assert_ne!(class_file.super_class, 0u16); + let super_info = constant_pool.get_class_info(class_file.super_class).unwrap(); + let name = constant_pool.get_string(**super_info).unwrap(); + Some(self.get_or_load(&*name).unwrap()) } + }; + if let Some(super_cl) = super_class.clone() { + let super_is_object = super_cl.super_class.is_none(); - + if access_flags.INTERFACE { + debug_assert!(super_is_object); + } } + let interfaces = class_file + .interfaces.iter().copied() + .map(|e| { + let interface_info = constant_pool.get_class_info(e).unwrap(); + let name = constant_pool.get_string(interface_info.name_index).unwrap(); + self.get_or_load(&name).unwrap() + }).collect::>(); + + let fields = class_file + .fields.iter() + .map(|e| { + let name = constant_pool.get_string(e.name_index).unwrap(); + let flags = FieldFlags::from(e.access_flags); + let desc = constant_pool.get_string(e.descriptor_index).map(|e|{ + FieldType::parse(&e) + }).unwrap().unwrap(); + let value = e.attributes.first() + .and_then(|x| { + if let Attribute::ConstantValue(val) = constant_pool.parse_attribute(x.clone()).unwrap() { + Some(val) + } else { + None + } + }); + FieldData { + name, + flags, + desc, + value, + } + }).collect::>(); - - - + let methods = class_file.methods.iter().map(|e| { + let name = constant_pool.get_string(e.name_index).unwrap(); + let flags = MethodFlags::from(e.access_flags); + let desc = constant_pool.get_string(e.descriptor_index).map(|e|{ + MethodDescriptor::parse(&e) + }).unwrap().unwrap(); + let code = e.attributes.first() + .and_then(|x| { + if let Attribute::Code(val) = constant_pool.parse_attribute(x.clone()).unwrap() { + Some(val) + } else { + None + } + }); + MethodData { + name, + flags, + desc, + code, + } + }).collect::>(); RuntimeClass { constant_pool, access_flags, - this_class: "".to_string(), - super_class: Arc::new(RuntimeClass {}), - interfaces: vec![], - fields: vec![], - methods: vec![], + this_class, + super_class, + interfaces, + fields, + methods, + init_state: Mutex::new(crate::class::InitState::NotInitialized), } } } diff --git a/src/lib.rs b/src/lib.rs index a259c53..02c060c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,33 +1,50 @@ -use std::cell::RefCell; -use std::io::Read; -use std::fmt::{Debug, Display, Formatter}; -use std::fs::File; -use std::ops::Deref; -use std::rc::Rc; -use std::sync::{Arc, Mutex}; -use deku::DekuContainerRead; -use deku_derive::{DekuRead, DekuWrite}; -use itertools::Itertools; -use vm::Vm; +//! A simple JVM implementation in Rust that aims to run Java class files. +//! +//! # Overview +//! This crate provides functionality to: +//! - Load and parse Java class files +//! - Execute JVM bytecode instructions +//! - Manage class loading and object creation +//! - Handle method invocation and stack frames +//! +//! # Core Types +//! - [`Frame`] - Represents a JVM stack frame for method execution +//! - [`Value`] - Represents JVM runtime values/primitives + +//! - [`BaseType`] - JVM primitive types +//! - [`MethodDescriptor`] - Method signature information +//! - [`FieldType`] - Field type information + use crate::attributes::{Attribute, CodeAttribute, Ops}; -use crate::class_file::{Bytecode, ClassFile, ClassFlags, ConstantPoolExt, CpInfo, FieldFlags, MethodFlags}; +use crate::class_file::constant_pool::ConstantPoolExt; +use crate::class_file::constant_pool::{ConstantPoolError, ConstantPoolGet}; +use crate::class_file::{Bytecode, ClassFile, ConstantPoolEntry, MethodData}; use crate::object::Object; use crate::thread::VmThread; +use deku::{DekuContainerRead, DekuError}; +use deku_derive::{DekuRead, DekuWrite}; +use log::warn; +use std::fmt::{Debug, Display, Formatter}; +use std::fs::File; +use std::io::Read; +use std::sync::{Arc, Mutex}; +use vm::Vm; mod attributes; +mod bimage; +mod class; mod class_file; mod class_loader; mod macros; -mod bimage; -mod vm; mod object; mod rng; mod thread; -mod class; +mod vm; const NULL: Value = Value::Reference(None); -include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +// include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +/// pseudo main pub fn run() { env_logger::init(); // let mut cl = ClassLoader::new().unwrap(); @@ -42,13 +59,17 @@ pub fn run() { class_file.read_to_end(&mut bytes).unwrap(); let (_rest, clazz) = ClassFile::from_bytes((bytes.as_ref(), 0)).unwrap(); let method = clazz.methods.iter().nth(1).unwrap().clone(); - let code = method.attributes.iter().find_map(|x| { - if let Some(Attribute::Code(code_attr)) = &x.get(&clazz) { - Some(code_attr.clone()) - } else { - None - } - }).unwrap(); + let code = method + .attributes + .iter() + .find_map(|x| { + if let Some(Attribute::Code(code_attr)) = &x.get(&clazz) { + Some(code_attr.clone()) + } else { + None + } + }) + .unwrap(); // let frame = Frame::new(); // println!("{}", code); let mut buf = Vec::new(); @@ -56,85 +77,101 @@ pub fn run() { buf.extend_from_slice(&bytes); buf.extend_from_slice(&code.code.clone()); let (_rest, ops) = Bytecode::from_bytes((buf.as_ref(), 0)).unwrap(); - let var_table = code.attributes.iter().find_map(|x| { - if let Some(Attribute::LocalVariableTable(varTableAttr)) = &x.get(&clazz) { - Some(varTableAttr.clone()) - } else { - None - } - }).unwrap(); + let var_table = code + .attributes + .iter() + .find_map(|x| { + if let Some(Attribute::LocalVariableTable(varTableAttr)) = &x.get(&clazz) { + Some(varTableAttr.clone()) + } else { + None + } + }) + .unwrap(); println!("{}", clazz); - let pool = clazz.constant_pool; - let mut vm = Arc::new(Vm::new()); - let mut frame = Frame::new(code, pool, Default::default(), vm); - // println!("{:?}", frame); - frame.execute(); + // let pool = clazz.constant_pool; + let mut vm = Vm::new("org/example/App"); // println!("{:?}", ops); // println!("{:?}", var_table.local_variable_table); // vm.method(ops.clone(), code, var_table); } - - -#[derive(Debug)] -pub struct KlassField { - pub name: String, - pub field_flags: FieldFlags, - pub descriptor: FieldType, -} -#[derive(Debug)] -pub struct KlassMethod { - pub name: String, - pub method_flags: MethodFlags, - pub method_descriptor: MethodDescriptor, - pub code_attribute: CodeAttribute -} - +/// A reference-counted, thread-safe pointer to an Object. type ObjectRef = Arc>; -#[derive(Debug)] -#[derive(Clone)] + +/// Represents a JVM runtime value. +/// +/// This enum covers all primitive types and object references that can exist +/// on the operand stack or in local variables during bytecode execution. +#[derive(Debug, Clone)] enum Value { + /// Boolean value (true/false) Boolean(bool), + /// Unicode character Char(char), + /// 32-bit floating point Float(f32), + /// 64-bit floating point Double(f64), + /// Signed 8-bit integer Byte(i8), + /// Signed 16-bit integer Short(i16), + /// Signed 32-bit integer Int(i32), + /// Signed 64-bit integer Long(i64), + /// Reference to an object (or null) Reference(Option), } - +/// Represents a JVM stack frame for method execution. +/// +/// A frame contains all the execution state needed to run a single method: +/// - Program counter (PC) tracking the current bytecode instruction +/// - Operand stack for intermediate values during computation +/// - Local variables for method parameters and local vars +/// - Reference to the constant pool for the class +/// - The bytecode to execute +/// - Reference to the thread executing this frame struct Frame { - - // program counter + /// Program counter - index of the current bytecode instruction pc: u16, - // operand stack + /// Operand stack for intermediate values stack: Vec, - // local vars + /// Local variables (includes method parameters) vars: Vec, - // constant pool - pool: Arc>, + /// Constant pool from the class file + pool: Arc>, + /// The bytecode instructions for this method bytecode: Bytecode, - thread: Arc + /// The thread executing this frame + thread: Arc, } impl Display for Frame { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "PC: {}\nStack: {:?}\nVars: {:?}", self.pc, self.stack, self.vars) + write!( + f, + "PC: {}\nStack: {:?}\nVars: {:?}", + self.pc, self.stack, self.vars + ) } } // println!("State:\n\tStack: {:?}\n\tLocals :{:?}\n", self.stack, self.vars) } - impl Frame { fn load_constant(index: u8) {} - fn new(code_attr: CodeAttribute, pool: Arc>, mut locals: Vec, thread: Arc) -> Self { + fn new( + code_attr: CodeAttribute, + pool: Arc>, + mut locals: Vec, + thread: Arc, + ) -> Self { let max_stack = code_attr.max_stack as usize; let max_local = code_attr.max_locals as usize; let bytes = code_attr.code_length.to_be_bytes(); @@ -142,7 +179,7 @@ impl Frame { buf.extend_from_slice(&bytes); buf.extend_from_slice(&code_attr.code.clone()); let (_rest, bytecode) = Bytecode::from_bytes((buf.as_ref(), 0)).unwrap(); - let extend = vec![Value::Reference(None); max_local-locals.len()]; + let extend = vec![Value::Reference(None); max_local - locals.len()]; locals.extend_from_slice(&extend); Frame { pc: 0, @@ -150,29 +187,48 @@ impl Frame { vars: locals, pool, bytecode, - vm, + thread, } } - fn execute(&mut self) { + fn execute(&mut self) -> Result, VmError> { let binding = self.bytecode.code.clone(); let mut ops = binding.iter(); - while let Some(op) = ops.next() { + for op in ops { println!("Executing Op: {:?}", op); let result = self.execute_instruction(op); match result { - Ok(_) => { println!("State:\n\tStack: {:?}\n\tLocals :{:?}\n", self.stack, self.vars) } - Err(_) => {panic!("Mission failed, we'll get em next time")} + Ok(ExecutionResult::Return(c)) => return Ok(None), + Ok(ExecutionResult::ReturnValue(val)) => return Ok(Some(val)), + Ok(_) => { + println!( + "State:\n\tStack: {:?}\n\tLocals :{:?}\n", + self.stack, self.vars + ) + } + Err(_) => { + panic!("Mission failed, we'll get em next time") + } } - - } - () + Err(VmError::ExecutionError) } } +/// Represents JVM primitive types used in field and method descriptors. +/// +/// Each variant corresponds to a single-character type code used in the JVM: +/// - B: byte +/// - C: char +/// - D: double +/// - F: float +/// - I: int +/// - J: long +/// - S: short +/// - Z: boolean #[derive(Debug, PartialEq, DekuRead, DekuWrite)] #[deku(id_type = "u8")] #[deku(seek_from_current = "-1")] +#[derive(Clone)] pub enum BaseType { /// B #[deku(id = "0x42")] @@ -211,72 +267,145 @@ impl From for BaseType { 'J' => BaseType::Long, 'S' => BaseType::Short, 'Z' => BaseType::Boolean, - _ => panic!("Invalid base type: {}", value) + _ => panic!("Invalid base type: {}", value), } } } -#[derive(Debug, PartialEq)] +/// Represents a parsed method descriptor that describes method parameters and return type. +/// +/// Method descriptors follow the format: `(ParamTypes...)ReturnType` +/// For example: +/// - `()V` - Takes no parameters and returns void +/// - `(II)I` - Takes two ints and returns an int +/// - `([Ljava/lang/String;)V` - Takes String array, returns void (public static void main) +#[derive(Debug, PartialEq, Clone)] pub struct MethodDescriptor { parameters: Vec, // none = void/v - return_type: Option + return_type: Option, } -#[derive(Debug, PartialEq)] + +impl MethodDescriptor { + fn void() -> Self { + Self { + parameters: vec![], + return_type: None, + } + } + fn psvm() -> Self { + MethodDescriptor::parse("([Ljava/lang/String;)V").unwrap() + } +} + +/// Represents types that can be used for fields in the JVM. +/// +/// Field types can be: +/// - Primitive types (represented by BaseType) +/// - Class types (prefixed with L) +/// - Array types (prefixed with [) +#[derive(Debug, PartialEq, Clone)] pub enum FieldType { + /// Represents a primitive type in the JVM, such as int, boolean, etc. + /// These are stored directly on the stack rather than as object references. Base(BaseType), - // L ClassName + /// Represents a reference to a class type, prefixed with 'L' in descriptors. + /// For example: "Ljava/lang/String;" represents a String reference. ClassType(String), - // [ ComponentType + /// Represents an array type, prefixed with '[' in descriptors. + /// The inner FieldType represents the component type of the array. ArrayType(Box), } enum ExecutionResult { Continue, Return(()), - ReturnValue(Value) + ReturnValue(Value), } +#[derive(Debug)] +enum VmError { + ConstantPoolError(String), + StackError(String), + DekuError(DekuError), + LoaderError(String), + ExecutionError, +} + +impl Display for VmError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + VmError::ConstantPoolError(msg) => write!(f, "Constant pool error: {}", msg), + VmError::StackError(msg) => write!(f, "Stack error: {}", msg), + VmError::DekuError(err) => write!(f, "Deku error: {}", err), + VmError::LoaderError(msg) => write!(f, "Loader error: {}", msg), + VmError::ExecutionError => write!(f, "Execution error"), + } + } +} + +impl From for VmError { + fn from(value: ConstantPoolError) -> Self { + Self::ConstantPoolError(value.to_string()) + } +} +impl From for VmError { + fn from(value: DekuError) -> Self { + Self::DekuError(value) + } +} + impl Frame { - fn execute_instruction(&mut self, op: &Ops) -> Result { + fn execute_instruction(&mut self, op: &Ops) -> Result { match op { Ops::ldc(index) => { - let thing = self.pool.get_constant(index.to_owned() as u16).ok_or(())?; + let thing = self.pool.get_constant(index.to_owned() as u16)?; println!("\tLoading constant: {}", thing); let resolved: Option = match thing { - CpInfo::Utf8(x) => { println!("{:?}", String::from_utf8(x.bytes.clone())); None } - CpInfo::Integer(x) => { Some(Value::Int(x.clone())) } - CpInfo::Float(x) => { Some(Value::Float(x.clone())) } - // CpInfo::Long(x) => { None } - // CpInfo::Double(x) => {None} - CpInfo::Class(x) => {None} - CpInfo::String(x) => { - // self.vm. + ConstantPoolEntry::Utf8(x) => { + println!("{:?}", String::from_utf8(x.bytes.clone())); + warn!("Utf8 loading not yet implemented"); + None + } + ConstantPoolEntry::Integer(x) => Some(Value::Int(x.clone())), + ConstantPoolEntry::Float(x) => Some(Value::Float(x.clone())), + ConstantPoolEntry::Class(x) => None, + ConstantPoolEntry::String(x) => { + warn!("String loading not yet implemented"); + None + } + + ConstantPoolEntry::MethodHandle(x) => { + warn!("Method handle loading not yet implemented"); + None + } + ConstantPoolEntry::MethodType(x) => { + warn!("Method type loading not yet implemented"); + None + } + ConstantPoolEntry::Dynamic(x) => { + warn!("Dynamic loading not yet implemented"); + None + } + _ => { + panic!( + "Cannot load constant, is not of loadable type: {:?}. ", + thing + ); None } - // CpInfo::FieldRef(x) => {} - CpInfo::MethodRef(x) => {None} - // CpInfo::InterfaceMethodRef(x) => {} - // CpInfo::NameAndType(x) => {} - CpInfo::MethodHandle(x) => {None} - CpInfo::MethodType(x) => { None } - // CpInfo::Dynamic(x) => {} - // CpInfo::InvokeDynamic(x) => {} - // CpInfo::Module(x) => {} - // CpInfo::Package(x) => {} - _ => { None } }; if let Some(x) = resolved { self.stack.push(x); }; Ok(ExecutionResult::Continue) - }, + } Ops::ldc2_w(index) => { - let val = self.pool.get_constant(*index).ok_or(())?; + let val = self.pool.get_constant(*index)?; println!("\tLoading constant: {}", val); let resolved = match val { - CpInfo::Double(x) => { Some(Value::Double(x.clone())) } - CpInfo::Long(x) => { Some(Value::Long(x.clone())) } - _ => { None } + ConstantPoolEntry::Double(x) => Some(Value::Double(x.clone())), + ConstantPoolEntry::Long(x) => Some(Value::Long(x.clone())), + _ => None, }; if let Some(x) = resolved { self.stack.push(x); @@ -284,49 +413,111 @@ impl Frame { Ok(ExecutionResult::Continue) } // store - Ops::fstore(index) => { store!(self, f, *index as usize) }, - Ops::fstore_0 => { store!(self, f, 0) }, - Ops::fstore_1 => { store!(self, f, 1) }, - Ops::fstore_2 => { store!(self, f, 2) }, - Ops::fstore_3 => { store!(self, f, 3) } - Ops::dstore(index) => { store!(self, d, *index as usize) }, - Ops::dstore_0 => { store!(self, d, 0) }, - Ops::dstore_1 => { store!(self, d, 1) }, - Ops::dstore_2 => { store!(self, d, 2) }, - Ops::dstore_3 => { store!(self, d, 3) } + Ops::fstore(index) => { + store!(self, f, *index as usize) + } + Ops::fstore_0 => { + store!(self, f, 0) + } + Ops::fstore_1 => { + store!(self, f, 1) + } + Ops::fstore_2 => { + store!(self, f, 2) + } + Ops::fstore_3 => { + store!(self, f, 3) + } + Ops::dstore(index) => { + store!(self, d, *index as usize) + } + Ops::dstore_0 => { + store!(self, d, 0) + } + Ops::dstore_1 => { + store!(self, d, 1) + } + Ops::dstore_2 => { + store!(self, d, 2) + } + Ops::dstore_3 => { + store!(self, d, 3) + } // load - Ops::fload(index) => { load!(self, f, *index as usize) } - Ops::fload_0 => { load!(self, f, 0) } - Ops::fload_1 => { load!(self, f, 1) } - Ops::fload_2 => { load!(self, f, 2) } - Ops::fload_3 => { load!(self, f, 3) } - Ops::dload(index) => { load!(self, d, *index as usize) } - Ops::dload_0 => { load!(self, d, 0) } - Ops::dload_1 => { load!(self, d, 1) } - Ops::dload_2 => { load!(self, d, 2) } - Ops::dload_3 => { load!(self, d, 3) } + Ops::fload(index) => { + load!(self, f, *index as usize) + } + Ops::fload_0 => { + load!(self, f, 0) + } + Ops::fload_1 => { + load!(self, f, 1) + } + Ops::fload_2 => { + load!(self, f, 2) + } + Ops::fload_3 => { + load!(self, f, 3) + } + Ops::dload(index) => { + load!(self, d, *index as usize) + } + Ops::dload_0 => { + load!(self, d, 0) + } + Ops::dload_1 => { + load!(self, d, 1) + } + Ops::dload_2 => { + load!(self, d, 2) + } + Ops::dload_3 => { + load!(self, d, 3) + } Ops::f2d => { if let Value::Float(float) = self.stack.pop().expect("Stack must have value") { let double: f64 = float.into(); self.stack.push(Value::Double(double)); Ok(ExecutionResult::Continue) - } else { Err(()) } + } else { + Err(VmError::StackError( + "Popped value was not float".to_string(), + )) + } } Ops::dadd => { let value1 = self.stack.pop().expect("Stack must have value"); let value2 = self.stack.pop().expect("Stack must have value"); - if let (Value::Double(value1), Value::Double(value2)) = (value1, value2) { - self.stack.push(Value::Double(value1 + value2)); + if let (Value::Double(double1), Value::Double(double2)) = + (value1.clone(), value2.clone()) + { + self.stack.push(Value::Double(double1 + double2)); Ok(ExecutionResult::Continue) - } else { Err(()) } + } else { + Err(VmError::StackError(format!( + "{value1:?} or {value2:?} was not a double" + ))) + } } // get static field + // can init the field Ops::getstatic(index) => { - let field = self.pool.resolve_field(*index)?; + let field_ref = self.pool.resolve_field(*index)?; + println!("Getting static field {field_ref:?}"); + + let init_class = self + .thread + .get_or_resolve_class(&field_ref.class, self.thread.clone()) + .expect("TO hecken work"); + let result = init_class + .find_field(&field_ref.name, field_ref.desc) + .expect("TO hecken work"); + let constant = result.value.clone().unwrap(); + // let (code, pool) = { // let mut loader = self.vm.loader.lock().unwrap(); // let class = loader.get_or_load(&field.class).unwrap(); @@ -334,10 +525,10 @@ impl Frame { // // let code = class.get_code(meth)?; // (code, pool) // }; - println!("{:?}", field); + // println!("{:?}", field); todo!("Finish get static"); Ok(ExecutionResult::Continue) - }, + } Ops::invokevirtual(index) => { let meth = self.pool.resolve_method_ref(*index)?; @@ -347,26 +538,48 @@ impl Frame { let slice = self.stack.get(first..last).unwrap().to_vec(); //sub slice param length + one, throw it to frame new let (code, pool) = { - let mut loader = self.vm.loader.lock().unwrap(); + let mut loader = self.thread.loader.lock().unwrap(); let class = loader.get_or_load(&meth.class).unwrap(); let pool = class.constant_pool.clone(); - let code = class.get_code(meth)?; + let code = class + .find_method(&meth.name, meth.desc) + .unwrap() + .code + .clone() + .unwrap(); (code, pool) }; // let code = class.get_code(meth)?; // let class = self.vm.loader.get_or_load(&meth.class).unwrap(); // let pool = &class.constant_pool; let vars = slice; - let frame = Frame::new(code, pool.clone(), vars, self.vm.clone()); + let frame = Frame::new(code, pool.clone(), vars, self.thread.clone()); // println!("{:?}", meth); // todo!("Finish invoke virtual"); Ok(ExecutionResult::Continue) } - Ops::return_void => { - Ok(ExecutionResult::Return(())) + Ops::invokestatic(index) => { + let method_ref = self.pool.resolve_method_ref(*index)?; + let class = self + .thread + .get_or_resolve_class(&method_ref.class, self.thread.clone())?; + // let method_data = class + // .find_method(&method_ref.name, method_ref.desc)? + // .clone(); + + let result = self.thread.invoke(method_ref, self.thread.clone())?; + if let Some(val) = result { + self.stack.push(val) + } + // todo!("Implement invoke static {}", index) + Ok(ExecutionResult::Continue) + } + + Ops::return_void => Ok(ExecutionResult::Return(())), + _ => { + todo!("Unimplemented op: {:?}", op) } - _ => { todo!("Unimplemented op: {:?}", op) } } } -} \ No newline at end of file +} diff --git a/src/macros.rs b/src/macros.rs index a589f55..9fbd812 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,3 +1,4 @@ +use crate::class_file::constant_pool::ConstantPoolError; use crate::Value; #[macro_export] @@ -40,3 +41,16 @@ macro_rules! load { ($self:expr, l, $index:expr) => {load!($self, i, $index)}; ($self:expr, f, $index:expr) => {load!($self, i, $index)}; } + +#[macro_export] +macro_rules! pool_get_impl { + ($fn_name:ident => $result:ty, $variant:ident) => { + fn $fn_name(&self, index: u16) -> Result<&$result, ConstantPoolError> { + let cp_entry = self.get_constant(index)?; + match cp_entry { + ConstantPoolEntry::$variant(value) => Ok(value), + _ => Err(ConstantPoolError(format!("Expected {} constant at index {}", stringify!($variant), index))), + } + } + }; +} diff --git a/src/thread.rs b/src/thread.rs index ecb71dd..c579224 100644 --- a/src/thread.rs +++ b/src/thread.rs @@ -1,34 +1,182 @@ -use std::sync::{Arc, Mutex}; -use crate::class_file::ClassFile; +use crate::class::RuntimeClass; +use crate::class_file::{ClassFile, MethodRef}; use crate::class_loader::{ClassLoader, LoaderRef}; -use crate::Frame; use crate::vm::Vm; +use crate::{Frame, MethodDescriptor, Value, VmError}; +use deku::DekuError::Incomplete; +use std::sync::{Arc, Mutex}; + +type MethodCallResult = Result, VmError>; // A thread of execution pub struct VmThread { - vm: Arc, - loader: Arc>, - frame_stack: Vec + pub vm: Arc, + pub loader: Arc>, + pub frame_stack: Vec, } impl VmThread { - - pub fn new(vm: Arc, loader: LoaderRef, ) -> Self { + pub fn new(vm: Arc, loader: Option) -> Self { + let loader = loader.unwrap_or(vm.loader.clone()); Self { vm, loader, frame_stack: Vec::new(), } } - - - pub fn get_or_resolve_class(&self, what: &str) -> () { - let class_file = self.loader.lock().unwrap().get_or_load(what).unwrap(); - self.init(class_file) + /// Get or resolve a class, ensuring it and its dependencies are initialized. + /// Follows JVM Spec 5.5 for recursive initialization handling. + pub fn get_or_resolve_class( + &self, + what: &str, + thread: Arc, + ) -> Result, VmError> { + // Phase 1: Load the class (short lock) + let runtime_class = self + .loader + .lock() + .unwrap() + .get_or_load(what) + .map_err(|e| VmError::LoaderError(e))?; + + // Phase 2: Collect classes that need initialization (short lock) + let classes_to_init = { + let mut loader = self.loader.lock().unwrap(); + let classes = loader.needs_init.clone(); + loader.needs_init.clear(); + classes + }; + + // Phase 3: Initialize each class (NO lock held - allows recursion) + for class in classes_to_init { + self.init(class, thread.clone())?; + } + + Ok(runtime_class) } - - fn init(&self, class: Arc) { - class.methods.first() + + /// Initialize a class following JVM Spec 5.5. + /// Handles recursive initialization by tracking which thread is initializing. + fn init(&self, class: Arc, thread: Arc) -> Result<(), VmError> { + use crate::class::InitState; + use std::thread; + + let current_thread = thread::current().id(); + + // Check and update initialization state + { + let mut state = class.init_state.lock().unwrap(); + match &*state { + InitState::Initialized => { + // Already initialized, nothing to do + return Ok(()); + } + InitState::Initializing(tid) if *tid == current_thread => { + // JVM Spec 5.5: Recursive initialization by same thread is allowed + println!( + "Class {} already being initialized by this thread (recursive)", + class.this_class + ); + return Ok(()); + } + InitState::Initializing(tid) => { + // Different thread is initializing - in a real JVM we'd wait + // For now, just return an error + return Err(VmError::LoaderError(format!( + "Class {} is being initialized by another thread", + class.this_class + ))); + } + InitState::Error(msg) => { + return Err(VmError::LoaderError(format!( + "Class {} initialization previously failed: {}", + class.this_class, msg + ))); + } + InitState::NotInitialized => { + // Mark as being initialized by this thread + *state = InitState::Initializing(current_thread); + } + } + } + + // Perform actual initialization + println!("Initializing class: {}", class.this_class); + let result = (|| { + // Initialize superclass first (if any) + if let Some(ref super_class) = class.super_class { + self.init(super_class.clone(), thread.clone())?; + } + + // Run if present + let class_init_method = class.find_method("", MethodDescriptor::void()); + if let Ok(method) = class_init_method { + Frame::new( + method.code.clone().unwrap(), + class.constant_pool.clone(), + vec![], + thread.clone(), + ) + .execute() + .map_err(|e| VmError::LoaderError(format!("Error in : {:?}", e)))?; + } + Ok(()) + })(); + + // Update state based on result + { + let mut state = class.init_state.lock().unwrap(); + match result { + Ok(_) => { + *state = InitState::Initialized; + println!("Class {} initialized successfully", class.this_class); + } + Err(ref e) => { + *state = InitState::Error(format!("{:?}", e)); + } + } + } + + result } -} \ No newline at end of file + + pub fn invoke_main(&self, what: &str, thread: Arc) { + let class = self.get_or_resolve_class(what, thread.clone()).unwrap(); + println!("invoking main: {}", class.this_class); + let main_method = class.find_method("main", MethodDescriptor::psvm()); + println!("{:?}", main_method); + if let Ok(meth) = main_method { + let mut frame = Frame::new( + meth.code.clone().unwrap(), + class.constant_pool.clone(), + vec![], + thread.clone(), + ); + // self.frame_stack.push(frame); + frame.execute().expect("Error in main"); + // self.frame_stack.first().unwrap().execute(); + } + } + + pub fn invoke(&self, method_reference: MethodRef, thread: Arc) -> MethodCallResult { + let class = self.get_or_resolve_class(&method_reference.class, thread.clone())?; + let resolved_method = class + .find_method(&method_reference.name, method_reference.desc) + .unwrap(); + if resolved_method.flags.ACC_NATIVE { + return self.invoke_native(); + } + let mut frame = Frame::new( + resolved_method.code.clone().unwrap(), + class.constant_pool.clone(), + vec![], + thread.clone(), + ); + frame.execute() + } + + pub fn invoke_native(&self) -> MethodCallResult { + todo!("Invoke native") + } +} diff --git a/src/vm.rs b/src/vm.rs index 9d3882c..40cc248 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,4 +1,5 @@ use std::sync::{Arc, Mutex}; +use crate::class_file::ClassFile; use crate::class_loader::ClassLoader; use crate::Frame; use crate::thread::VmThread; @@ -6,22 +7,20 @@ use crate::thread::VmThread; // struct AbstractObject<'a> {} pub struct Vm { // for now, model just a single thread - pub thread: Vec, + pub thread: Mutex>>, pub loader: Arc> } impl Vm { - pub fn new() -> Self { - Self { + // start vm, loading main from classfile + pub fn new(what: &str) -> Arc { + let vm = Arc::new(Self { loader: Arc::new(Mutex::from(ClassLoader::default())), - thread: Vec::new(), - } + thread: Mutex::new(Vec::new()), + }); + let thread = Arc::new(VmThread::new(vm.clone(), None)); + vm.thread.lock().unwrap().push(thread.clone()); + thread.invoke_main(what, thread.clone()); + vm.clone() } - - pub fn get_or_resolve_class(&self, what: &str) -> () { - let class_file = self.loader.lock().unwrap().get_or_load(what).unwrap(); - self.init(class_file) - } - - fn init() -> () {} } \ No newline at end of file