From 7a7bbb2ac0740899d916b19fcdda536e9885f8b2 Mon Sep 17 00:00:00 2001 From: Ales Katona Date: Sun, 24 Jan 2021 11:14:09 -0800 Subject: [PATCH] use memmap --- Cargo.toml | 2 + src/common.rs | 31 ++++++ src/common/loadable.rs | 52 +++++---- src/common/relocatable.rs | 29 +---- src/common/section.rs | 39 ++++++- src/error.rs | 12 +- src/formats/elf/object.rs | 227 ++++++++++++++++++++++---------------- src/formats/elf/output.rs | 44 +------- src/linker.rs | 6 +- 9 files changed, 249 insertions(+), 193 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8a55991..6638170 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,8 @@ edition = "2018" [dependencies] page_size = "0.4.2" +memmap = "0.7.0" +bincode = "1.3.1" elf-utilities = { version = "0.2.8", optional = true } xmas-elf = { version = "0.7.0", optional = true } diff --git a/src/common.rs b/src/common.rs index 6f2b81c..5d10f41 100644 --- a/src/common.rs +++ b/src/common.rs @@ -4,8 +4,39 @@ mod relocatable; mod section; mod symbol; +use std::{ + convert::TryInto, + fs::File, + io::{BufWriter, Seek, SeekFrom}, + path::Path, +}; + pub use loadable::*; pub use output::*; pub use relocatable::*; pub use section::*; pub use symbol::*; + +use crate::error::Error; + +pub fn expand_path(path: &Path) -> Result<&str, Error> { + use std::io::Error as IOError; + use std::io::ErrorKind; + + path.to_str().ok_or_else(|| { + let ioe = IOError::new(ErrorKind::Other, "Path expansion fail"); + let boxed = Box::new(ioe); + Error::IOError(boxed) + }) +} + +pub fn pad_to_next_page(writer: &mut BufWriter, offset: usize) -> Result { + let page_size = page_size::get(); + + let padding = page_size - (offset % page_size); + + eprintln!("Padding from: {} with: {}", offset, padding); + writer.seek(SeekFrom::Current(padding.try_into()?))?; + + Ok(padding) +} diff --git a/src/common/loadable.rs b/src/common/loadable.rs index cf00768..7d24087 100644 --- a/src/common/loadable.rs +++ b/src/common/loadable.rs @@ -1,12 +1,7 @@ -use std::{collections::HashMap, iter::once, rc::Rc}; +use std::iter::once; +use crate::common::{Relocatable, Section, SectionInfo, Symbol}; use crate::error::Error; -use crate::{ - common::{Relocatable, Section, SectionInfo, Symbol}, - error::{LinkError, Trace, LE_GLOBAL_SYMBOL_DUPLICATE}, -}; - -use super::Binding; pub enum SegmentType { Text, @@ -35,17 +30,15 @@ impl SegmentSections { } } -// TODO: use attributes for field section names, indexes etc. #[derive(Default)] pub struct Loadable { - pub start_offset: Option, + start_offset: Option, text: SegmentSections, rodata: SegmentSections, data: SegmentSections, bss: SegmentSections, - - symbol_map: HashMap, Symbol>, + // symbol_map: HashMap, Symbol>, } impl Loadable { @@ -60,7 +53,7 @@ impl Loadable { Ok(()) } - pub fn process_symbol(&mut self, symbol: Symbol, objects: &[R]) -> Result<(), Error> + pub fn process_symbol(&mut self, _symbol: Symbol, _objects: &[R]) -> Result<(), Error> where R: Relocatable, { @@ -120,13 +113,13 @@ impl Loadable { let text_iter = self.text.iter(); let rodata_iter = self.rodata.iter(); - let data1 = text_iter.filter_map(move |si| match si.data_index { - None => None, - Some(di) => Some(objects[di.object_index].section_data(di)), + let data1 = text_iter.filter_map(move |si| match si.file_size { + 0 => None, + _ => Some(objects[si.object_index].bytes(si.offset, si.file_size)), }); - let data2 = rodata_iter.filter_map(move |si| match si.data_index { - None => None, - Some(di) => Some(objects[di.object_index].section_data(di)), + let data2 = rodata_iter.filter_map(move |si| match si.data_size { + 0 => None, + _ => Some(objects[si.object_index].bytes(si.offset, si.file_size)), }); data1.chain(data2) @@ -136,9 +129,9 @@ impl Loadable { &'l self, objects: &'l [R], ) -> impl Iterator> { - let iter = self.data.iter().filter_map(move |si| match si.data_index { - None => None, - Some(di) => Some(objects[di.object_index].section_data(di)), + let iter = self.data.iter().filter_map(move |si| match si.file_size { + 0 => None, + _ => Some(objects[si.object_index].bytes(si.offset, si.file_size)), }); iter @@ -176,4 +169,21 @@ impl Loadable { result } + + pub fn set_start_offset(&mut self, offset: u64) { + self.start_offset = Some(offset); + } + + pub fn start_offset(&self) -> Result { + use crate::error::LinkError; + + self.start_offset.ok_or_else(|| { + let link_error = LinkError { + message: "Program entrypoint not found".into(), + ..Default::default() + }; + + Error::LinkingError(link_error) + }) + } } diff --git a/src/common/relocatable.rs b/src/common/relocatable.rs index 0bd6b69..54ba98b 100644 --- a/src/common/relocatable.rs +++ b/src/common/relocatable.rs @@ -5,29 +5,6 @@ use crate::error::Error; use super::{Section, Symbol}; -#[derive(Clone, Copy)] -// index into section of given object stored in the linker -pub struct DataIndex { - pub object_index: usize, - pub section_index: usize, -} - -impl DataIndex { - pub fn new(object_index: usize, section_index: usize) -> Self { - DataIndex { - object_index, - section_index, - } - } - - pub fn for_object(object_index: usize) -> Self { - DataIndex { - object_index, - section_index: 0, - } - } -} - pub type SectionIterBox<'iter> = Box> + 'iter>; pub type SymbolIterBox<'iter> = Box + 'iter>; @@ -36,13 +13,13 @@ pub type SymbolIterBox<'iter> = Box + 'iter>; /// mushed and relocated executable from an object file pub trait Relocatable: Display + Sized { // + TryFrom { - fn new(origin: PathBuf, index: DataIndex) -> Result; + fn new(origin: PathBuf, object_index: usize) -> Result; fn origin(&self) -> &Path; // not same as section's path since this one's supposed to be cannonical fn sections(&self) -> SectionIterBox; - fn section_data(&self, section_index: DataIndex) -> Result<&[u8], Error>; - fn symbols(&self) -> SymbolIterBox; + + fn bytes(&self, offset: u64, size: u64) -> Result<&[u8], Error>; } diff --git a/src/common/section.rs b/src/common/section.rs index cae7713..171bdbc 100644 --- a/src/common/section.rs +++ b/src/common/section.rs @@ -4,13 +4,13 @@ use std::{ fmt::{Display, Formatter}, }; -use super::DataIndex; +use super::Relocatable; #[derive(Clone)] pub struct SectionInfo { + pub object_index: usize, pub file_size: u64, pub data_size: u64, - pub data_index: Option, // some form of indexing to the source data (Relocatable's data) pub offset: u64, } @@ -21,6 +21,39 @@ pub enum Section { Bss(SectionInfo), } +impl Section { + pub fn data<'o, R>(&self, object: &'o R) -> Result<&'o [u8], Error> + where + R: Relocatable, + { + object.bytes(self.offset(), self.file_size()) + } + + pub fn object_index(&self) -> usize { + self.info().object_index + } + + pub fn offset(&self) -> u64 { + self.info().offset + } + + pub fn data_size(&self) -> u64 { + self.info().data_size + } + + pub fn file_size(&self) -> u64 { + self.info().file_size + } + + fn info(&self) -> &SectionInfo { + match self { + Section::Text(si) => si, + Section::Data(si, _) => si, + Section::Bss(si) => si, + } + } +} + impl<'data> TryFrom<(&str, SectionInfo)> for Section { type Error = Error; @@ -46,7 +79,7 @@ impl Display for SectionInfo { "file_size: {}, data_size: {}, data: {}, offset: {}", self.file_size, self.data_size, - self.data_index.is_some(), + self.data_size > 0, self.offset, ) } diff --git a/src/error.rs b/src/error.rs index 024191b..3ae39bd 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,15 +3,17 @@ use std::{ path::PathBuf, }; -pub const LE_GLOBAL_SYMBOL_DUPLICATE: u32 = 701; +// pub const LE_GLOBAL_SYMBOL_DUPLICATE: u32 = 10001; #[derive(Debug)] pub enum Error { IOError(Box), + DataError(PathBuf), // generic "something wrong with data" error, not specific to parsing InvalidObjectType(u32), InvalidSectionName, InvalidSectionData, - ParseError(Box), + InvalidInput, // input is completely wrong (e.g. no magic bytes etc.) + ParseError(Box), // error on parsing input TryFromIntError, LinkingError(LinkError), } @@ -26,7 +28,7 @@ pub struct Trace { // "backtrace" for error origin #[derive(Debug, Default)] pub struct LinkError { - pub code: u32, // specific trace code, should be >= 700 + pub code: u32, // specific trace code, should be >= 10000 pub message: String, pub traces: Vec, } @@ -36,11 +38,13 @@ impl Error { pub fn code(&self) -> u32 { match self { Error::IOError(_) => 100, + Error::DataError(_) => 800, Error::InvalidObjectType(_) => 200, Error::InvalidSectionName => 300, Error::InvalidSectionData => 400, Error::ParseError(_) => 500, Error::TryFromIntError => 600, + Error::InvalidInput => 700, Error::LinkingError(le) => le.code, } } @@ -70,11 +74,13 @@ impl Display for Error { match self { Error::IOError(err) => write!(f, "IO error: {}", err), + Error::DataError(origin) => write!(f, "Data error in {:?}", origin), Error::InvalidObjectType(ot) => write!(f, "Invalid object type: {}", ot), Error::InvalidSectionName => write!(f, "Invalid section name"), Error::InvalidSectionData => write!(f, "Invalid section data"), Error::ParseError(err) => write!(f, "Parse error: {}", err), Error::TryFromIntError => write!(f, "Integer conversion error"), + Error::InvalidInput => write!(f, "Input object invalid"), Error::LinkingError(trace) => write!(f, "Linking error: {}", trace), } } diff --git a/src/formats/elf/object.rs b/src/formats/elf/object.rs index 1753edb..f90e6b0 100644 --- a/src/formats/elf/object.rs +++ b/src/formats/elf/object.rs @@ -1,22 +1,24 @@ use std::{ - convert::TryFrom, fmt::Display, + fs::File, path::{Path, PathBuf}, }; -use crate::common::{Binding, DataIndex, Relocatable, Section, SectionInfo, Symbol, SymbolIterBox}; +use crate::common::{Binding, Relocatable, Section, SectionInfo, Symbol, SymbolIterBox}; use crate::{common::SectionIterBox, error::Error}; use elf_utilities::{ - file::ELF64, - parser::read_elf64, - section::{Contents64, Type}, + header::Ehdr64, + section::Shdr64, symbol::{Bind, Symbol64}, }; +use memmap::Mmap; pub struct ElfObject { object_index: usize, origin: PathBuf, - elf: ELF64, + data: Mmap, + ehdr: Ehdr64, + sh_name_offset: usize, } impl ElfObject { @@ -25,21 +27,67 @@ impl ElfObject { .as_path() .to_str() .ok_or(Error::InvalidObjectType(404))?; - let elf = match read_elf64(str_origin) { - Ok(val) => val, - Err(err) => return Err(Error::ParseError(err)), - }; - is_relocatable(&elf)?; + let file = File::open(str_origin)?; + let data = unsafe { Mmap::map(&file)? }; + let ehdr = parse_elf_header(&data)?; + let shstrtab = parse_shstrtab(&data, &ehdr)?; + let result = ElfObject { object_index, origin, - elf, + data, + ehdr, + sh_name_offset: shstrtab.sh_offset as usize, }; Ok(result) } + fn section_name(&self, shdr: &Shdr64) -> Result<&str, Error> { + let idx: usize = shdr.sh_name as usize; + let start = self.sh_name_offset + idx; + + let mut i = start; + while self.data[i] != 0 { + i += 1; + // sanity check + if i - start > 1024 { + return Err(Error::DataError(self.origin.clone())); + } + } + + // TODO: consider unchecked? + std::str::from_utf8(&self.data[start..i]).map_err(|err| Error::ParseError(err.into())) + } + + fn make_section(&self, offset: usize, sh: &Shdr64) -> Option> { + let name = match self.section_name(sh) { + Ok(n) => n, + Err(err) => return Some(Err(err)), + }; + + let mut si = SectionInfo { + object_index: self.object_index, + file_size: sh.sh_size, + data_size: sh.sh_size, + offset: offset as u64, + }; + + if name.starts_with(".bss") { + si.file_size = 0; + Some(Ok(Section::Bss(si))) + } else if name.starts_with(".text") { + Some(Ok(Section::Text(si))) + } else if name.starts_with(".rodata") { + Some(Ok(Section::Data(si, true))) + } else if name.starts_with(".data") { + Some(Ok(Section::Data(si, false))) + } else { + None + } + } + fn make_symbol(&self, s64: &Symbol64, strtab: &[u8]) -> Symbol { let binding = match s64.get_bind() { Bind::Global => Binding::Global, @@ -59,8 +107,8 @@ impl ElfObject { } impl Relocatable for ElfObject { - fn new(origin: PathBuf, di: DataIndex) -> Result { - ElfObject::new(origin, di.object_index) + fn new(origin: PathBuf, object_index: usize) -> Result { + ElfObject::new(origin, object_index) } fn origin(&self) -> &Path { @@ -68,92 +116,62 @@ impl Relocatable for ElfObject { } fn sections(&self) -> SectionIterBox { - let iter = self - .elf - .sections - .iter() - .enumerate() - .filter_map(move |(i, s)| match s.header.get_type() { - Type::ProgBits => { - if s.header.sh_size > 0 { - if match &s.contents { - Contents64::Raw(v) => Some(v), - _ => None, - } - .is_some() - { - let si = SectionInfo { - file_size: s.header.sh_size, - data_size: s.header.sh_size, - data_index: Some(DataIndex::new(self.object_index, i)), - offset: s.header.sh_offset, - }; - let s_name: &str = &s.name; - - match Section::try_from((s_name, si)) { - Ok(s) => Some(Ok(s)), - Err(Error::InvalidSectionName) => None, // skip - Err(err) => Some(Err(err)), - } - } else { - Some(Err(Error::InvalidSectionData)) - } - } else { - None - } - } - Type::NoBits => Some(Ok(Section::Bss(SectionInfo { - file_size: 0, - data_size: s.header.sh_size, - data_index: None, - offset: s.header.sh_offset, - }))), - _ => None, - }); + let iter = (0..self.ehdr.e_shnum).into_iter().filter_map(move |i| { + let i_usize = usize::from(i); + let sh_start = self.ehdr.e_shoff as usize; + let sh_size = usize::from(self.ehdr.e_shentsize); + + let offset: usize = sh_start + i_usize * sh_size; + let shr: Result = + bincode::deserialize(&self.data[offset..offset + sh_size]) + .map_err(|err| Error::ParseError(err)); + + match shr { + Err(err) => Some(Err(err)), + Ok(sh) => self.make_section(offset, &sh), + } + }); Box::new(iter) } - fn section_data(&self, index: DataIndex) -> Result<&[u8], Error> { - let section = &self.elf.sections[index.section_index]; - - match §ion.contents { - Contents64::Raw(v) => Ok(&v), - _ => Err(Error::InvalidSectionData), - } + fn bytes(&self, offset: u64, size: u64) -> Result<&[u8], Error> { + let o = offset as usize; + let s = size as usize; + Ok(&self.data[o..o + s]) } fn symbols(&self) -> SymbolIterBox { - if let Some(strtab_section) = self - .elf - .first_section_by(|s64| s64.header.get_type() == Type::StrTab && s64.name == ".strtab") - { - let strtab = match &strtab_section.contents { - Contents64::Raw(bytes) => bytes, - _ => panic!("Unexpected strtab content type"), - }; - - let iter = self - .elf - .sections - .iter() - .filter_map(move |s| match &s.contents { - Contents64::Symbols(symbols) => { - Some(symbols.iter().filter_map(move |sym| match sym.get_bind() { - Bind::Global | Bind::Local | Bind::Weak => { - Some(self.make_symbol(sym, strtab)) - } - _ => None, - })) - } - _ => None, - }) - .flatten(); - - Box::new(iter) - } else { - Box::new(std::iter::empty()) - } + // if let Some(strtab_section) = self + // .elf + // .first_section_by(|s64| s64.header.get_type() == Type::StrTab && s64.name == ".strtab") + // { + // let strtab = match &strtab_section.contents { + // Contents64::Raw(bytes) => bytes, + // _ => panic!("Unexpected strtab content type"), + // }; + + // let iter = self + // .elf + // .sections + // .iter() + // .filter_map(move |s| match &s.contents { + // Contents64::Symbols(symbols) => { + // Some(symbols.iter().filter_map(move |sym| match sym.get_bind() { + // Bind::Global | Bind::Local | Bind::Weak => { + // Some(self.make_symbol(sym, strtab)) + // } + // _ => None, + // })) + // } + // _ => None, + // }) + // .flatten(); + + // Box::new(iter) + // } else { + Box::new(std::iter::empty()) + //} } } @@ -188,12 +206,27 @@ fn parse_strtab_name(strtab: &[u8], idx: u32) -> String { .to_string() } -fn is_relocatable(elf: &ELF64) -> Result<(), Error> { +fn parse_elf_header(data: &Mmap) -> Result { use elf_utilities::header::Type; - if elf.ehdr.get_type() != Type::Rel { - return Err(Error::InvalidObjectType(elf.ehdr.e_type.into())); + let magic = &data[0..4]; + if magic != &[0x7f, 0x45, 0x4c, 0x46] { + return Err(Error::InvalidInput); + } + + let ehdr = Ehdr64::deserialize(data, 0).map_err(|err| Error::ParseError(err))?; + if ehdr.get_type() != Type::Rel { + return Err(Error::InvalidObjectType(u32::from(ehdr.e_type))); } - Ok(()) + Ok(ehdr) +} + +fn parse_shstrtab(data: &Mmap, ehdr: &Ehdr64) -> Result { + let idx: usize = ehdr.e_shstrndx.into(); + let sh_start: usize = ehdr.e_shoff as usize; + let sh_size: usize = ehdr.e_shentsize.into(); + let offset: usize = sh_start + sh_size * idx; + + bincode::deserialize(&data[offset..offset + sh_size]).map_err(|err| Error::ParseError(err)) } diff --git a/src/formats/elf/output.rs b/src/formats/elf/output.rs index d0d9ef7..08164d3 100644 --- a/src/formats/elf/output.rs +++ b/src/formats/elf/output.rs @@ -1,10 +1,9 @@ use std::{ - convert::TryInto, fs::File, io::BufWriter, - io::{Seek, SeekFrom, Write}, + io::{Write}, mem::size_of, - path::{Path, PathBuf}, + path::{PathBuf}, }; use elf_utilities::{ @@ -14,7 +13,7 @@ use elf_utilities::{ }; use crate::{ - common::{Loadable, Output, SegmentType}, + common::{expand_path, pad_to_next_page, Loadable, Output, SegmentType}, error::Error, }; @@ -50,7 +49,7 @@ impl Output for ElfOutput { ehdr.e_shnum = Loadable::section_count() as u16 + 1; // +1 for .shstrtab ehdr.e_phnum = Loadable::segment_count() as u16; ehdr.e_shstrndx = ehdr.e_shnum - 1; // .shstrab is always last - ehdr.e_entry = get_start_offset(loadable)?; + ehdr.e_entry = loadable.start_offset()?; ehdr.e_phentsize = PHS; ehdr.e_shentsize = SHS; ehdr.e_phoff = EHS; @@ -214,38 +213,3 @@ fn make_strtab() -> (Vec, Vec) { (strtab_bytes, indexes) } - -fn get_start_offset(loadable: &Loadable) -> Result { - use crate::error::LinkError; - - loadable.start_offset.ok_or_else(|| { - let link_error = LinkError { - message: "Program entrypoint not found".into(), - ..Default::default() - }; - - Error::LinkingError(link_error) - }) -} - -fn expand_path(path: &Path) -> Result<&str, Error> { - use std::io::Error as IOError; - use std::io::ErrorKind; - - path.to_str().ok_or_else(|| { - let ioe = IOError::new(ErrorKind::Other, "Path expansion fail"); - let boxed = Box::new(ioe); - Error::IOError(boxed) - }) -} - -fn pad_to_next_page(writer: &mut BufWriter, offset: usize) -> Result { - let page_size = page_size::get(); - - let padding = page_size - (offset % page_size); - - eprintln!("Padding from: {} with: {}", offset, padding); - writer.seek(SeekFrom::Current(padding.try_into()?))?; - - Ok(padding) -} diff --git a/src/linker.rs b/src/linker.rs index 0c09b7f..fcac3d5 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,7 +1,7 @@ use std::{fmt::Display, path::PathBuf}; use crate::{ - common::{DataIndex, Loadable, Output, Relocatable}, + common::{Loadable, Output, Relocatable}, error::Error, }; @@ -29,7 +29,7 @@ where } pub fn add_relocatable(&mut self, origin: PathBuf) -> Result<(), Error> { - let r = R::new(origin, DataIndex::for_object(self.relocatables.len()))?; + let r = R::new(origin, self.relocatables.len())?; // TODO: parallelize? for section in r.sections() { @@ -44,7 +44,7 @@ where pub fn link(mut self) -> Result { self.process_symbols()?; - self.loadable.start_offset = Some(4096); // TODO: get from .start symbol location + self.loadable.set_start_offset(4096); // TODO: get from .start symbol location self.output.finalize(&self.relocatables, &self.loadable) }