diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 59da3cca..5d44aaea 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -42,15 +42,15 @@ pub mod x86; pub const OPCODE_INVALID: u16 = u16::MAX; pub const OPCODE_DATA: u16 = u16::MAX - 1; -const SUPPORTED_ENCODINGS: [(&encoding_rs::Encoding, &str); 7] = [ +const SUPPORTED_ENCODINGS_WITH_NULL_TERM: [(&encoding_rs::Encoding, &str); 5] = [ (encoding_rs::UTF_8, "UTF-8"), (encoding_rs::SHIFT_JIS, "Shift JIS"), - (encoding_rs::UTF_16BE, "UTF-16BE"), - (encoding_rs::UTF_16LE, "UTF-16LE"), (encoding_rs::WINDOWS_1252, "Windows-1252"), (encoding_rs::EUC_JP, "EUC-JP"), (encoding_rs::BIG5, "Big5"), ]; +const SUPPORTED_ENCODINGS_NO_NULL_TERM: [(&encoding_rs::Encoding, &str); 2] = + [(encoding_rs::UTF_16BE, "UTF-16BE"), (encoding_rs::UTF_16LE, "UTF-16LE")]; /// Represents the type of data associated with an instruction #[derive(PartialEq)] @@ -174,24 +174,27 @@ impl DataType { strs.push((format!("{bytes:#?}"), None, None)); } DataType::String => { - // Special case to display (ASCII) as the label for ASCII-only strings. - let mut is_ascii = false; - if bytes.is_ascii() - && let Ok(str) = str::from_utf8(bytes) - { - let trimmed = str.trim_end_matches('\0'); - if !trimmed.is_empty() { - let copy_string = escape_special_ascii_characters(trimmed); - strs.push((trimmed.to_string(), Some("ASCII".into()), Some(copy_string))); - is_ascii = true; + if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') { + let str_bytes = &bytes[..nul_idx]; + // Special case to display (ASCII) as the label for ASCII-only strings. + let (cow, _, had_errors) = encoding_rs::UTF_8.decode(str_bytes); + if !had_errors && cow.is_ascii() { + let string = format!("{cow}"); + let copy_string = escape_special_ascii_characters(&string); + strs.push((string, Some("ASCII".into()), Some(copy_string))); + } + for (encoding, encoding_name) in SUPPORTED_ENCODINGS_WITH_NULL_TERM { + let (cow, _, had_errors) = encoding.decode(str_bytes); + // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. + if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { + let string = format!("{cow}"); + let copy_string = escape_special_ascii_characters(&string); + strs.push((string, Some(encoding_name.into()), Some(copy_string))); + } } } - for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. - if is_ascii && encoding.is_ascii_compatible() { - continue; - } + for (encoding, encoding_name) in SUPPORTED_ENCODINGS_NO_NULL_TERM { let (cow, _, had_errors) = encoding.decode(bytes); if had_errors { continue; diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs index 13f7273a..44b620f2 100644 --- a/objdiff-core/src/arch/ppc/flow_analysis.rs +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -513,6 +513,8 @@ pub fn ppc_data_flow_analysis( ) } +// Note: This function only supports MWCC ASCII strings. +// Other encodings and other compilers are currently not supported. fn get_string_data(obj: &Object, symbol_index: usize, offset: Simm) -> Option<&str> { if let Some(sym) = obj.symbols.get(symbol_index) && sym.name.starts_with("@stringBase") diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index b8a6bab1..0cefda20 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -359,9 +359,12 @@ impl Arch for ArchPpc { fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option { if resolved.relocation.is_some_and(|r| { - r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG") + r.symbol.name.starts_with("@stringBase") + || r.symbol.name.starts_with("@wstringBase") + || r.symbol.name.starts_with("$SG") + || r.symbol.demangled_name == Some("`string'".to_string()) }) { - // Pooled string. + // Compiler-generated symbol name for a string or a pool of strings. return Some(DataType::String); } let opcode = powerpc::Opcode::from(resolved.ins_ref.opcode); diff --git a/objdiff-core/tests/arch_ppc.rs b/objdiff-core/tests/arch_ppc.rs index 78aaaea9..57ae05e0 100644 --- a/objdiff-core/tests/arch_ppc.rs +++ b/objdiff-core/tests/arch_ppc.rs @@ -122,3 +122,90 @@ fn read_vmx128_coff() { let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); insta::assert_snapshot!(output); } + +#[test] +#[cfg(feature = "ppc")] +fn decode_sjis_pooled_strings() { + // Test multiple pooled Shift JIS strings separated by a single null byte between entries. (MWCC) + let diff_config = diff::DiffObjConfig { combine_data_sections: true, ..Default::default() }; + let obj = obj::read::parse( + include_object!("data/ppc/m_Do_hostIO.o"), + &diff_config, + diff::DiffSide::Base, + ) + .unwrap(); + common::assert_literal_value( + &obj, + &diff_config, + "createChild__16mDoHIO_subRoot_cFPCcP13JORReflexible", + 15, + "Shift JIS", + "危険:既に登録されているホストIOをふたたび登録しようとしています<%s>\n", + ); + common::assert_literal_value( + &obj, + &diff_config, + "createChild__16mDoHIO_subRoot_cFPCcP13JORReflexible", + 42, + "Shift JIS", + "ホストIOの空きエントリがありません。登録できませんでした。\n", + ); +} + +#[test] +#[cfg(feature = "ppc")] +fn decode_utf16_unpooled_strings() { + // Test unpooled UTF-16BE wide strings with null bytes at the start, end, and in the middle of the string. (MSVC) + let diff_config = diff::DiffObjConfig { combine_data_sections: true, ..Default::default() }; + let obj = obj::read::parse( + include_object!("data/ppc/KinectSharePanel.obj"), + &diff_config, + diff::DiffSide::Base, + ) + .unwrap(); + common::assert_literal_value( + &obj, + &diff_config, + "?OnPostLink@KinectSharePanel@@AAA?AVDataNode@@PAVDataArray@@@Z", + 84, + "UTF-16BE", + "Title Text", + ); + common::assert_literal_value( + &obj, + &diff_config, + "?OnPostLink@KinectSharePanel@@AAA?AVDataNode@@PAVDataArray@@@Z", + 120, + "UTF-16BE", + "http://www.dancecentral.com/content-assets/2012/06/2012E3LogoBox_tn.jpg", + ); +} + +#[test] +#[cfg(feature = "ppc")] +fn decode_ascii_strings_with_null_padding() { + // Test unpooled ASCII strings with more than one null byte at the end. + let diff_config = diff::DiffObjConfig { combine_data_sections: true, ..Default::default() }; + let obj = obj::read::parse( + include_object!("data/ppc/vmx128.obj"), + &diff_config, + diff::DiffSide::Base, + ) + .unwrap(); + common::assert_literal_value( + &obj, + &diff_config, + "?PrintVector@@YAXPBDU__vector4@@@Z", + 24, + "ASCII", + "%s: [%.2f, %.2f, %.2f, %.2f]\n", + ); + common::assert_literal_value( + &obj, + &diff_config, + "?ReservedRegisterExample@@YAXXZ", + 59, + "ASCII", + "Result from vr66", + ); +} diff --git a/objdiff-core/tests/common.rs b/objdiff-core/tests/common.rs index 311c622c..330a5da2 100644 --- a/objdiff-core/tests/common.rs +++ b/objdiff-core/tests/common.rs @@ -50,3 +50,25 @@ macro_rules! include_object { include_bytes_align_as!(u64, $path) }; } + +#[allow(dead_code)] +pub fn assert_literal_value( + obj: &Object, + diff_config: &DiffObjConfig, + function_name: &str, + ins_row_idx: usize, + literal_label: &str, + literal_value: &str, +) { + let symbol_idx = obj.symbols.iter().position(|s| s.name == function_name).unwrap(); + let diff = objdiff_core::diff::code::no_diff_code(obj, symbol_idx, diff_config).unwrap(); + let ins_ref = diff.instruction_rows[ins_row_idx].ins_ref.unwrap(); + let resolved = obj.resolve_instruction_ref(symbol_idx, ins_ref).unwrap(); + let literals = objdiff_core::diff::display::display_ins_data_literals(obj, resolved); + let target_literal = literals + .iter() + .find(|(_, label_override, _)| *label_override == Some(literal_label.to_string())); + assert_ne!(target_literal, None); + let (literal, _, _) = target_literal.unwrap(); + assert_eq!(literal, literal_value); +} diff --git a/objdiff-core/tests/data/ppc/KinectSharePanel.obj b/objdiff-core/tests/data/ppc/KinectSharePanel.obj new file mode 100644 index 00000000..b8571db7 Binary files /dev/null and b/objdiff-core/tests/data/ppc/KinectSharePanel.obj differ diff --git a/objdiff-wasm/src/api.rs b/objdiff-wasm/src/api.rs index 980273cc..ed1c686a 100644 --- a/objdiff-wasm/src/api.rs +++ b/objdiff-wasm/src/api.rs @@ -225,7 +225,7 @@ impl GuestDisplay for Component { let obj_diff = diff.get::(); let obj = obj_diff.0.as_ref(); let symbol_display = from_symbol_ref(symbol_ref); - diff::display::symbol_context(obj, symbol_display.symbol as usize) + diff::display::symbol_context(obj, symbol_display.symbol) .into_iter() .map(ContextItem::from) .collect() @@ -237,7 +237,7 @@ impl GuestDisplay for Component { let addend = 0; // TODO let override_color = None; // TODO: colorize replaced/deleted/inserted relocations let symbol_display = from_symbol_ref(symbol_ref); - diff::display::symbol_hover(obj, symbol_display.symbol as usize, addend, override_color) + diff::display::symbol_hover(obj, symbol_display.symbol, addend, override_color) .into_iter() .map(HoverItem::from) .collect()