From c65e504178a0a2bb44e392b13ad704aa15e85e12 Mon Sep 17 00:00:00 2001 From: Ales Katona Date: Thu, 4 Feb 2021 15:54:07 -0800 Subject: [PATCH] WIP --- src/common/loadable.rs | 61 +++++++++++++++--- src/common/output.rs | 6 +- src/common/relocatable.rs | 8 ++- src/common/section.rs | 1 + src/common/symbol.rs | 74 ++++++++++++++++------ src/formats/elf/object.rs | 130 +++++++++++++++++++++++++------------- src/formats/elf/output.rs | 54 +++++++++++++--- src/linker.rs | 12 +++- 8 files changed, 258 insertions(+), 88 deletions(-) diff --git a/src/common/loadable.rs b/src/common/loadable.rs index dccca7c..caa2323 100644 --- a/src/common/loadable.rs +++ b/src/common/loadable.rs @@ -7,6 +7,8 @@ use crate::{ error::LinkError, }; +use super::SymbolValue; + pub enum SegmentType { Text, Data, @@ -62,11 +64,28 @@ impl SegmentSections { pub fn data_size(&self) -> u64 { self.data_size } + + pub fn unresolved_offsets<'s>( + &'s self, + symbols: &'s Symbols, + ) -> impl Iterator + 's { + symbols + .unresolved() + .map(move |si| { + self.sections_info.iter().filter_map(move |section| { + match section.si.section_index == si.section_index { + true => Some(section.output_offset + si.offsets.symbol_offset), + false => None, + } + }) + }) + .flatten() + } } #[derive(Default)] pub struct Loadable { - start_offset: Option, + start_offset: Option, text: SegmentSections, rodata: SegmentSections, @@ -100,12 +119,8 @@ impl Loadable { } let name = object.symbol_name(si.symbol_index)?; - eprintln!("NAME: {}/{:?}", name, object.symbol_value(si.symbol_index)?); - - let si = SymbolIndex { - object_index: object.object_index(), - symbol_index: si.symbol_index, - }; + eprint!("OBJECT: {:?}\nNAME: {}/", object.origin(), name); + eprintln!("{:?}", object.symbol_value(si.symbol_index)?); if let Some(existing) = self.symbol_map.get_mut(name) { existing.push(si, objects) @@ -188,11 +203,11 @@ impl Loadable { result } - pub fn set_start_offset(&mut self, offset: u64) { + pub fn set_start_offset(&mut self, offset: usize) { self.start_offset = Some(offset); } - pub fn start_offset(&self) -> Result { + pub fn start_offset(&self) -> Result { self.start_offset.ok_or_else(|| { let link_error = LinkError { message: "Program entrypoint not found".into(), @@ -202,4 +217,32 @@ impl Loadable { Error::LinkingError(link_error) }) } + + pub fn symbol_resolutions<'o, R>( + &'o self, + objects: &'o [R], + ) -> impl Iterator> + where + R: Relocatable, + { + self.symbol_map.values().map(move |symbols| { + let value = symbols.value(objects).expect("value"); // TODO + let offsets = self.unresolved_offsets(symbols).collect(); + + SymbolResolution { value, offsets } + }) + } + + fn unresolved_offsets<'s>(&'s self, symbols: &'s Symbols) -> impl Iterator + 's { + self.text + .unresolved_offsets(symbols) + .chain(self.rodata.unresolved_offsets(symbols)) + .chain(self.data.unresolved_offsets(symbols)) + .chain(self.bss.unresolved_offsets(symbols)) + } +} + +pub struct SymbolResolution<'b> { + pub value: SymbolValue<'b>, + pub offsets: Vec, // TODO: box iterator instead } diff --git a/src/common/output.rs b/src/common/output.rs index 4de868e..2ab9cc6 100644 --- a/src/common/output.rs +++ b/src/common/output.rs @@ -7,5 +7,9 @@ pub trait Output where R: Relocatable, { - fn finalize(self, objects: &[R], loadable: &mut Loadable) -> Result; + fn relocate_sections(&mut self, objects: &[R], loadable: &mut Loadable) -> Result<(), Error>; + + fn resolve_symbols(&mut self, objects: &[R], loadable: &mut Loadable) -> Result<(), Error>; + + fn finalize(self) -> Result; } diff --git a/src/common/relocatable.rs b/src/common/relocatable.rs index 53e7879..55472ad 100644 --- a/src/common/relocatable.rs +++ b/src/common/relocatable.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, path::PathBuf}; use crate::error::Error; -use super::Section; +use super::{Section, SymbolOffsets, SymbolValue}; pub type SectionIterBox<'iter> = Box> + 'iter>; @@ -23,9 +23,11 @@ pub trait Relocatable: Display + Sized { fn symbol_name(&self, index: usize) -> Result<&str, Error>; - fn symbol_value(&self, index: usize) -> Result<&[u8], Error>; + fn symbol_value(&self, index: usize) -> Result; - fn symbol_file_offset(&self, index: usize) -> Result; + fn symbol_offsets(&self, index: usize) -> Result; + + fn symbol_section_index(&self, index: usize) -> Result; fn symbol_needs_resolving(&self, index: usize) -> bool; diff --git a/src/common/section.rs b/src/common/section.rs index 171bdbc..7acb3d4 100644 --- a/src/common/section.rs +++ b/src/common/section.rs @@ -9,6 +9,7 @@ use super::Relocatable; #[derive(Clone)] pub struct SectionInfo { pub object_index: usize, + pub section_index: usize, pub file_size: u64, pub data_size: u64, pub offset: u64, diff --git a/src/common/symbol.rs b/src/common/symbol.rs index afa5420..4443b9c 100644 --- a/src/common/symbol.rs +++ b/src/common/symbol.rs @@ -7,7 +7,28 @@ use super::Relocatable; #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub struct SymbolIndex { pub object_index: usize, + pub section_index: usize, pub symbol_index: usize, + pub offsets: SymbolOffsets, +} + +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub struct SymbolOffsets { + pub section_offset: usize, // within the source relocatable + pub symbol_offset: usize, // within the section +} + +impl SymbolOffsets { + pub fn file_offset(&self) -> usize { + self.section_offset + self.symbol_offset + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolValue<'o> { + Undefined, + Address(usize), + Value(&'o [u8]), } #[derive(Default)] @@ -28,38 +49,52 @@ impl Symbols { self.indexes.push(si); - if value.len() > 0 { - if let Some(existing) = self.value_index { - let osi = self - .indexes - .get(existing) - .ok_or(Error::InvalidSymbolIndex)?; - Self::duplicate_symbol_error(si, *osi, objects) - } else { - self.value_index = Some(self.indexes.len() - 1); + // save "value index" if we found a value + match value { + SymbolValue::Value(_) | SymbolValue::Address(_) => { + if let Some(existing) = self.value_index { + let osi = self + .indexes + .get(existing) + .ok_or(Error::InvalidSymbolIndex)?; + Self::duplicate_symbol_error(si, *osi, objects) + } else { + self.value_index = Some(self.indexes.len() - 1); - Ok(()) + Ok(()) + } } - } else { - Ok(()) + SymbolValue::Undefined => Ok(()), } } - pub fn value<'o, R>(&self, object: &'o R) -> Result<&'o [u8], Error> + pub fn value<'o, R>(&self, objects: &'o [R]) -> Result, Error> where R: Relocatable, { - self.value_index.map_or(Ok(&[]), |v| { + self.value_index.map_or(Ok(SymbolValue::Undefined), |v| { let si = self.indexes[v]; // can this be compile time checked?? - if si.object_index != object.object_index() { - Err(Error::InvalidObjectIndex) - } else { + if let Some(object) = objects.get(si.object_index) { object.symbol_value(si.symbol_index) + } else { + Err(Error::InvalidObjectIndex) } }) } + pub fn unresolved(&self) -> impl Iterator { + let vi = self.value_index.unwrap_or(std::usize::MAX); + + self.indexes + .iter() + .enumerate() + .filter_map(move |(i, s)| match i == vi { + true => None, + false => Some(s), + }) + } + fn duplicate_symbol_error( si: SymbolIndex, osi: SymbolIndex, @@ -78,12 +113,13 @@ impl Symbols { traces.push(Trace { origin: object.origin().into(), - offset: object.symbol_file_offset(si.symbol_index)?, + offset: object.symbol_offsets(si.symbol_index)?.file_offset(), source_info: None, }); + traces.push(Trace { origin: other.origin().into(), - offset: other.symbol_file_offset(osi.symbol_index)?, + offset: other.symbol_offsets(osi.symbol_index)?.file_offset(), source_info: None, }); diff --git a/src/formats/elf/object.rs b/src/formats/elf/object.rs index 7dc391c..51b4158 100644 --- a/src/formats/elf/object.rs +++ b/src/formats/elf/object.rs @@ -4,7 +4,7 @@ use std::{ path::{Path, PathBuf}, }; -use crate::common::{Relocatable, Section, SectionInfo}; +use crate::common::{Relocatable, Section, SectionInfo, SymbolOffsets, SymbolValue}; use crate::{common::SectionIterBox, error::Error}; use elf_utilities::{ header::Ehdr64, @@ -13,13 +13,19 @@ use elf_utilities::{ }; use memmap::Mmap; +struct ElfSymbol { + s64: Symbol64, + sh_index: usize, + rel_offset: usize, +} + pub struct ElfObject { object_index: usize, origin: PathBuf, data: Mmap, ehdr: Ehdr64, shdrs: Vec, - symbols: Vec<(usize, Symbol64)>, + symbols: Vec, } impl ElfObject { @@ -48,15 +54,22 @@ impl ElfObject { Ok(result) } - fn resolve_symbol_value(&self, symbol_index: usize) -> Result<&[u8], Error> { - if let Some((_, s64)) = self.symbols.get(symbol_index) { - if s64.st_size == 0 { - Ok(&[]) - } else if let Some(shdr) = self.shdrs.get(s64.st_shndx as usize) { - let start: usize = (shdr.sh_offset + s64.st_value) as usize; - let end: usize = start + s64.st_size as usize; - - Ok(&self.data[start..end]) + fn resolve_symbol_value(&self, symbol_index: usize) -> Result { + if let Some(symbol) = self.symbols.get(symbol_index) { + let idx = symbol.s64.st_shndx as usize; + if idx == 0 { + Ok(SymbolValue::Undefined) + } else if let Some(shdr) = self.shdrs.get(idx) { + let sym_size = symbol.s64.st_size as usize; + let start: usize = (shdr.sh_offset + symbol.s64.st_value) as usize; + + if sym_size > 0 { + let end: usize = start + sym_size; + + Ok(SymbolValue::Value(&self.data[start..end])) + } else { + Ok(SymbolValue::Address(start)) + } } else { Err(Error::InvalidSectionIndex) } @@ -104,7 +117,8 @@ impl ElfObject { } fn populate(&mut self) -> Result<(), Error> { - for i in 0..self.ehdr.e_shnum { + for i in 1..self.ehdr.e_shnum { + // skip null section let i_usize: usize = i.into(); let sh_start = self.ehdr.e_shoff as usize; let sh_size = usize::from(self.ehdr.e_shentsize); @@ -113,24 +127,10 @@ impl ElfObject { let shdr: Shdr64 = bincode::deserialize(&self.data[offset..offset + sh_size]) .map_err(|err| Error::ParseError(err))?; - if shdr.get_type() == Type::SymTab { - let ent_size = shdr.sh_entsize as usize; - let offset = shdr.sh_offset as usize; - let count = shdr.sh_size as usize / ent_size; - - for s_i in 1..count { - // skip null-symbol - let start = offset + s_i * ent_size; - let s64 = Symbol64::deserialize(&self.data, start) - .map_err(|err| Error::ParseError(err))?; - - match s64.get_type() { - SymType::Object | SymType::Func | SymType::NoType => { - self.symbols.push((shdr.sh_link as usize, s64)) - } - _ => {} - } - } + match shdr.get_type() { + Type::SymTab => self.populate_symtab(&shdr, i as usize)?, + Type::Rel | Type::Rela => self.populate_rela(&shdr)?, + _ => {} } self.shdrs.push(shdr); @@ -139,7 +139,36 @@ impl ElfObject { Ok(()) } - fn make_section(&self, shdr: &Shdr64) -> Option> { + fn populate_symtab(&mut self, shdr: &Shdr64, sh_index: usize) -> Result<(), Error> { + let ent_size = shdr.sh_entsize as usize; + let offset = shdr.sh_offset as usize; + let count = shdr.sh_size as usize / ent_size; + + for s_i in 1..count { + // skip null-symbol + let start = offset + s_i * ent_size; + let s64 = + Symbol64::deserialize(&self.data, start).map_err(|err| Error::ParseError(err))?; + + match s64.get_type() { + SymType::Object | SymType::Func | SymType::NoType => self.symbols.push(ElfSymbol { + sh_index, + s64, + rel_offset: 0, + }), + _ => {} + } + } + + Ok(()) + } + + fn populate_rela(&mut self, shdr: &Shdr64) -> Result<(), Error> { + // TODO + Ok(()) + } + + fn make_section(&self, shdr: &Shdr64, section_index: usize) -> Option> { let sh_index: usize = self.ehdr.e_shstrndx.into(); let name = match self.resolve_name(shdr.sh_name as usize, sh_index) { Ok(n) => n, @@ -148,6 +177,7 @@ impl ElfObject { let mut si = SectionInfo { object_index: self.object_index, + section_index, file_size: shdr.sh_size, data_size: shdr.sh_size, offset: shdr.sh_offset, @@ -182,13 +212,14 @@ impl Relocatable for ElfObject { } fn sections(&self) -> SectionIterBox { - let iter = self - .shdrs - .iter() - .filter_map(move |shdr| match shdr.get_type() { - Type::ProgBits | Type::NoBits => self.make_section(shdr), - _ => None, - }); + let iter = + self.shdrs + .iter() + .enumerate() + .filter_map(move |(i, shdr)| match shdr.get_type() { + Type::ProgBits | Type::NoBits => self.make_section(shdr, i), + _ => None, + }); Box::new(iter) } @@ -205,30 +236,39 @@ impl Relocatable for ElfObject { fn symbol_name(&self, index: usize) -> Result<&str, Error> { if let Some(symbol) = self.symbols.get(index) { - self.resolve_name(symbol.1.st_name as usize, symbol.0) + self.resolve_name(symbol.s64.st_name as usize, symbol.sh_index) } else { Err(Error::InvalidSymbolIndex) } } - fn symbol_value(&self, index: usize) -> Result<&[u8], Error> { + fn symbol_value(&self, index: usize) -> Result { self.resolve_symbol_value(index) } - fn symbol_file_offset(&self, index: usize) -> Result { - let (sh_index, s64) = self.symbols.get(index).ok_or(Error::InvalidSymbolIndex)?; + fn symbol_section_index(&self, index: usize) -> Result { + let symbol = self.symbols.get(index).ok_or(Error::InvalidSymbolIndex)?; + + Ok(symbol.sh_index) + } + + fn symbol_offsets(&self, index: usize) -> Result { + let symbol = self.symbols.get(index).ok_or(Error::InvalidSymbolIndex)?; let shdr = self .shdrs - .get(*sh_index) + .get(symbol.sh_index) .ok_or(Error::InvalidSectionIndex)?; let sh_offset: usize = shdr.sh_offset as usize; - Ok(sh_offset + s64.st_value as usize) + Ok(SymbolOffsets { + section_offset: sh_offset, + symbol_offset: symbol.rel_offset, + }) } fn symbol_needs_resolving(&self, index: usize) -> bool { - match self.symbols[index].1.get_bind() { + match self.symbols[index].s64.get_bind() { Bind::Global | Bind::Weak => true, _ => false, } diff --git a/src/formats/elf/output.rs b/src/formats/elf/output.rs index e295cc7..f43cf72 100644 --- a/src/formats/elf/output.rs +++ b/src/formats/elf/output.rs @@ -1,4 +1,10 @@ -use std::{fs::File, io::BufWriter, io::Write, mem::size_of, path::PathBuf}; +use std::{ + fs::File, + io::BufWriter, + io::{Seek, SeekFrom, Write}, + mem::size_of, + path::PathBuf, +}; use elf_utilities::{ header::Ehdr64, @@ -7,7 +13,9 @@ use elf_utilities::{ }; use crate::{ - common::{expand_path, pad_to_next_page, Loadable, Output, Relocatable, SegmentType}, + common::{ + expand_path, pad_to_next_page, Loadable, Output, Relocatable, SegmentType, SymbolValue, + }, error::Error, }; @@ -58,11 +66,11 @@ impl ElfOutput { } impl Output for ElfOutput { - fn finalize( - mut self, + fn relocate_sections( + &mut self, objects: &[ElfObject], loadable: &mut Loadable, - ) -> Result { + ) -> Result<(), Error> { const EHS: u64 = size_of::() as u64; const PHS: u16 = size_of::() as u16; const SHS: u16 = size_of::() as u16; @@ -72,7 +80,7 @@ impl Output for ElfOutput { ehdr.e_shnum = SECTION_NAMES.len() as u16; ehdr.e_phnum = 3u16; // .text, .rodata + .data, .bss ehdr.e_shstrndx = ehdr.e_shnum - 1; // .shstrab is always last - ehdr.e_entry = loadable.start_offset()?; + ehdr.e_entry = loadable.start_offset()? as u64; ehdr.e_phentsize = PHS; ehdr.e_shentsize = SHS; ehdr.e_phoff = EHS; @@ -124,9 +132,7 @@ impl Output for ElfOutput { let bytes = object.bytes(si.offset, si.file_size)?; offset += self.writer.write(bytes)?; } - // for bytes in loadable.program_bytes(objects) { - // offset += self.writer.write(bytes?)?; - // } + offset += pad_to_next_page(&mut self.writer, offset)?; eprintln!("Data start: {}", offset); @@ -139,6 +145,36 @@ impl Output for ElfOutput { offset += self.writer.write(bytes)?; } + Ok(()) + } + + fn resolve_symbols( + &mut self, + objects: &[ElfObject], + loadable: &mut Loadable, + ) -> Result<(), Error> { + for resolution in loadable.symbol_resolutions(objects) { + for offset in resolution.offsets { + match resolution.value { + SymbolValue::Value(v) => { + eprintln!("Writing value {:?} to {}", resolution.value, offset); + self.writer.seek(SeekFrom::Start(offset as u64))?; + self.writer.write(v)?; + } + SymbolValue::Address(a) => { + eprintln!("TODO"); + } + SymbolValue::Undefined => { + eprintln!("TODO2"); + } + } + } + } + + Ok(()) + } + + fn finalize(mut self) -> Result { self.writer.flush()?; Ok(self.destination) diff --git a/src/linker.rs b/src/linker.rs index d7f3efa..e819a13 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -44,8 +44,14 @@ where pub fn link(mut self) -> Result { self.process_symbols()?; - self.loadable.set_start_offset(4096); // TODO: get from .start symbol location - self.output.finalize(&self.objects, &mut self.loadable) + self.loadable.set_start_offset(4096); // TODO + + self.output + .relocate_sections(&self.objects, &mut self.loadable)?; + self.output + .resolve_symbols(&self.objects, &mut self.loadable)?; + + self.output.finalize() } fn process_symbols(&mut self) -> Result<(), Error> { @@ -55,7 +61,9 @@ where for i in 0..symbol_count { let si = SymbolIndex { object_index: r.object_index(), + section_index: r.symbol_section_index(i)?, symbol_index: i, + offsets: r.symbol_offsets(i)?, }; self.loadable.process_symbol(si, &self.objects)?; }