use memmap

master
Ales Katona 4 years ago
parent 47e4104247
commit 7a7bbb2ac0
Signed by: almindor
GPG Key ID: 2F773149BF38B48F

@ -8,6 +8,8 @@ edition = "2018"
[dependencies]
page_size = "0.4.2"
memmap = "0.7.0"
bincode = "1.3.1"
elf-utilities = { version = "0.2.8", optional = true }
xmas-elf = { version = "0.7.0", optional = true }

@ -4,8 +4,39 @@ mod relocatable;
mod section;
mod symbol;
use std::{
convert::TryInto,
fs::File,
io::{BufWriter, Seek, SeekFrom},
path::Path,
};
pub use loadable::*;
pub use output::*;
pub use relocatable::*;
pub use section::*;
pub use symbol::*;
use crate::error::Error;
pub fn expand_path(path: &Path) -> Result<&str, Error> {
use std::io::Error as IOError;
use std::io::ErrorKind;
path.to_str().ok_or_else(|| {
let ioe = IOError::new(ErrorKind::Other, "Path expansion fail");
let boxed = Box::new(ioe);
Error::IOError(boxed)
})
}
pub fn pad_to_next_page(writer: &mut BufWriter<File>, offset: usize) -> Result<usize, Error> {
let page_size = page_size::get();
let padding = page_size - (offset % page_size);
eprintln!("Padding from: {} with: {}", offset, padding);
writer.seek(SeekFrom::Current(padding.try_into()?))?;
Ok(padding)
}

@ -1,12 +1,7 @@
use std::{collections::HashMap, iter::once, rc::Rc};
use std::iter::once;
use crate::common::{Relocatable, Section, SectionInfo, Symbol};
use crate::error::Error;
use crate::{
common::{Relocatable, Section, SectionInfo, Symbol},
error::{LinkError, Trace, LE_GLOBAL_SYMBOL_DUPLICATE},
};
use super::Binding;
pub enum SegmentType {
Text,
@ -35,17 +30,15 @@ impl SegmentSections {
}
}
// TODO: use attributes for field section names, indexes etc.
#[derive(Default)]
pub struct Loadable {
pub start_offset: Option<u64>,
start_offset: Option<u64>,
text: SegmentSections,
rodata: SegmentSections,
data: SegmentSections,
bss: SegmentSections,
symbol_map: HashMap<Rc<str>, Symbol>,
// symbol_map: HashMap<Rc<str>, Symbol>,
}
impl Loadable {
@ -60,7 +53,7 @@ impl Loadable {
Ok(())
}
pub fn process_symbol<R>(&mut self, symbol: Symbol, objects: &[R]) -> Result<(), Error>
pub fn process_symbol<R>(&mut self, _symbol: Symbol, _objects: &[R]) -> Result<(), Error>
where
R: Relocatable,
{
@ -120,13 +113,13 @@ impl Loadable {
let text_iter = self.text.iter();
let rodata_iter = self.rodata.iter();
let data1 = text_iter.filter_map(move |si| match si.data_index {
None => None,
Some(di) => Some(objects[di.object_index].section_data(di)),
let data1 = text_iter.filter_map(move |si| match si.file_size {
0 => None,
_ => Some(objects[si.object_index].bytes(si.offset, si.file_size)),
});
let data2 = rodata_iter.filter_map(move |si| match si.data_index {
None => None,
Some(di) => Some(objects[di.object_index].section_data(di)),
let data2 = rodata_iter.filter_map(move |si| match si.data_size {
0 => None,
_ => Some(objects[si.object_index].bytes(si.offset, si.file_size)),
});
data1.chain(data2)
@ -136,9 +129,9 @@ impl Loadable {
&'l self,
objects: &'l [R],
) -> impl Iterator<Item = Result<&'l [u8], Error>> {
let iter = self.data.iter().filter_map(move |si| match si.data_index {
None => None,
Some(di) => Some(objects[di.object_index].section_data(di)),
let iter = self.data.iter().filter_map(move |si| match si.file_size {
0 => None,
_ => Some(objects[si.object_index].bytes(si.offset, si.file_size)),
});
iter
@ -176,4 +169,21 @@ impl Loadable {
result
}
pub fn set_start_offset(&mut self, offset: u64) {
self.start_offset = Some(offset);
}
pub fn start_offset(&self) -> Result<u64, Error> {
use crate::error::LinkError;
self.start_offset.ok_or_else(|| {
let link_error = LinkError {
message: "Program entrypoint not found".into(),
..Default::default()
};
Error::LinkingError(link_error)
})
}
}

@ -5,29 +5,6 @@ use crate::error::Error;
use super::{Section, Symbol};
#[derive(Clone, Copy)]
// index into section of given object stored in the linker
pub struct DataIndex {
pub object_index: usize,
pub section_index: usize,
}
impl DataIndex {
pub fn new(object_index: usize, section_index: usize) -> Self {
DataIndex {
object_index,
section_index,
}
}
pub fn for_object(object_index: usize) -> Self {
DataIndex {
object_index,
section_index: 0,
}
}
}
pub type SectionIterBox<'iter> = Box<dyn Iterator<Item = Result<Section, Error>> + 'iter>;
pub type SymbolIterBox<'iter> = Box<dyn Iterator<Item = Symbol> + 'iter>;
@ -36,13 +13,13 @@ pub type SymbolIterBox<'iter> = Box<dyn Iterator<Item = Symbol> + 'iter>;
/// mushed and relocated executable from an object file
pub trait Relocatable: Display + Sized {
// + TryFrom<PathBuf, Error = Error> {
fn new(origin: PathBuf, index: DataIndex) -> Result<Self, Error>;
fn new(origin: PathBuf, object_index: usize) -> Result<Self, Error>;
fn origin(&self) -> &Path; // not same as section's path since this one's supposed to be cannonical
fn sections(&self) -> SectionIterBox;
fn section_data(&self, section_index: DataIndex) -> Result<&[u8], Error>;
fn symbols(&self) -> SymbolIterBox;
fn bytes(&self, offset: u64, size: u64) -> Result<&[u8], Error>;
}

@ -4,13 +4,13 @@ use std::{
fmt::{Display, Formatter},
};
use super::DataIndex;
use super::Relocatable;
#[derive(Clone)]
pub struct SectionInfo {
pub object_index: usize,
pub file_size: u64,
pub data_size: u64,
pub data_index: Option<DataIndex>, // some form of indexing to the source data (Relocatable's data)
pub offset: u64,
}
@ -21,6 +21,39 @@ pub enum Section {
Bss(SectionInfo),
}
impl Section {
pub fn data<'o, R>(&self, object: &'o R) -> Result<&'o [u8], Error>
where
R: Relocatable,
{
object.bytes(self.offset(), self.file_size())
}
pub fn object_index(&self) -> usize {
self.info().object_index
}
pub fn offset(&self) -> u64 {
self.info().offset
}
pub fn data_size(&self) -> u64 {
self.info().data_size
}
pub fn file_size(&self) -> u64 {
self.info().file_size
}
fn info(&self) -> &SectionInfo {
match self {
Section::Text(si) => si,
Section::Data(si, _) => si,
Section::Bss(si) => si,
}
}
}
impl<'data> TryFrom<(&str, SectionInfo)> for Section {
type Error = Error;
@ -46,7 +79,7 @@ impl Display for SectionInfo {
"file_size: {}, data_size: {}, data: {}, offset: {}",
self.file_size,
self.data_size,
self.data_index.is_some(),
self.data_size > 0,
self.offset,
)
}

@ -3,15 +3,17 @@ use std::{
path::PathBuf,
};
pub const LE_GLOBAL_SYMBOL_DUPLICATE: u32 = 701;
// pub const LE_GLOBAL_SYMBOL_DUPLICATE: u32 = 10001;
#[derive(Debug)]
pub enum Error {
IOError(Box<dyn std::error::Error>),
DataError(PathBuf), // generic "something wrong with data" error, not specific to parsing
InvalidObjectType(u32),
InvalidSectionName,
InvalidSectionData,
ParseError(Box<dyn std::error::Error>),
InvalidInput, // input is completely wrong (e.g. no magic bytes etc.)
ParseError(Box<dyn std::error::Error>), // error on parsing input
TryFromIntError,
LinkingError(LinkError),
}
@ -26,7 +28,7 @@ pub struct Trace {
// "backtrace" for error origin
#[derive(Debug, Default)]
pub struct LinkError {
pub code: u32, // specific trace code, should be >= 700
pub code: u32, // specific trace code, should be >= 10000
pub message: String,
pub traces: Vec<Trace>,
}
@ -36,11 +38,13 @@ impl Error {
pub fn code(&self) -> u32 {
match self {
Error::IOError(_) => 100,
Error::DataError(_) => 800,
Error::InvalidObjectType(_) => 200,
Error::InvalidSectionName => 300,
Error::InvalidSectionData => 400,
Error::ParseError(_) => 500,
Error::TryFromIntError => 600,
Error::InvalidInput => 700,
Error::LinkingError(le) => le.code,
}
}
@ -70,11 +74,13 @@ impl Display for Error {
match self {
Error::IOError(err) => write!(f, "IO error: {}", err),
Error::DataError(origin) => write!(f, "Data error in {:?}", origin),
Error::InvalidObjectType(ot) => write!(f, "Invalid object type: {}", ot),
Error::InvalidSectionName => write!(f, "Invalid section name"),
Error::InvalidSectionData => write!(f, "Invalid section data"),
Error::ParseError(err) => write!(f, "Parse error: {}", err),
Error::TryFromIntError => write!(f, "Integer conversion error"),
Error::InvalidInput => write!(f, "Input object invalid"),
Error::LinkingError(trace) => write!(f, "Linking error: {}", trace),
}
}

@ -1,22 +1,24 @@
use std::{
convert::TryFrom,
fmt::Display,
fs::File,
path::{Path, PathBuf},
};
use crate::common::{Binding, DataIndex, Relocatable, Section, SectionInfo, Symbol, SymbolIterBox};
use crate::common::{Binding, Relocatable, Section, SectionInfo, Symbol, SymbolIterBox};
use crate::{common::SectionIterBox, error::Error};
use elf_utilities::{
file::ELF64,
parser::read_elf64,
section::{Contents64, Type},
header::Ehdr64,
section::Shdr64,
symbol::{Bind, Symbol64},
};
use memmap::Mmap;
pub struct ElfObject {
object_index: usize,
origin: PathBuf,
elf: ELF64,
data: Mmap,
ehdr: Ehdr64,
sh_name_offset: usize,
}
impl ElfObject {
@ -25,21 +27,67 @@ impl ElfObject {
.as_path()
.to_str()
.ok_or(Error::InvalidObjectType(404))?;
let elf = match read_elf64(str_origin) {
Ok(val) => val,
Err(err) => return Err(Error::ParseError(err)),
};
is_relocatable(&elf)?;
let file = File::open(str_origin)?;
let data = unsafe { Mmap::map(&file)? };
let ehdr = parse_elf_header(&data)?;
let shstrtab = parse_shstrtab(&data, &ehdr)?;
let result = ElfObject {
object_index,
origin,
elf,
data,
ehdr,
sh_name_offset: shstrtab.sh_offset as usize,
};
Ok(result)
}
fn section_name(&self, shdr: &Shdr64) -> Result<&str, Error> {
let idx: usize = shdr.sh_name as usize;
let start = self.sh_name_offset + idx;
let mut i = start;
while self.data[i] != 0 {
i += 1;
// sanity check
if i - start > 1024 {
return Err(Error::DataError(self.origin.clone()));
}
}
// TODO: consider unchecked?
std::str::from_utf8(&self.data[start..i]).map_err(|err| Error::ParseError(err.into()))
}
fn make_section(&self, offset: usize, sh: &Shdr64) -> Option<Result<Section, Error>> {
let name = match self.section_name(sh) {
Ok(n) => n,
Err(err) => return Some(Err(err)),
};
let mut si = SectionInfo {
object_index: self.object_index,
file_size: sh.sh_size,
data_size: sh.sh_size,
offset: offset as u64,
};
if name.starts_with(".bss") {
si.file_size = 0;
Some(Ok(Section::Bss(si)))
} else if name.starts_with(".text") {
Some(Ok(Section::Text(si)))
} else if name.starts_with(".rodata") {
Some(Ok(Section::Data(si, true)))
} else if name.starts_with(".data") {
Some(Ok(Section::Data(si, false)))
} else {
None
}
}
fn make_symbol(&self, s64: &Symbol64, strtab: &[u8]) -> Symbol {
let binding = match s64.get_bind() {
Bind::Global => Binding::Global,
@ -59,8 +107,8 @@ impl ElfObject {
}
impl Relocatable for ElfObject {
fn new(origin: PathBuf, di: DataIndex) -> Result<Self, Error> {
ElfObject::new(origin, di.object_index)
fn new(origin: PathBuf, object_index: usize) -> Result<Self, Error> {
ElfObject::new(origin, object_index)
}
fn origin(&self) -> &Path {
@ -68,92 +116,62 @@ impl Relocatable for ElfObject {
}
fn sections(&self) -> SectionIterBox {
let iter = self
.elf
.sections
.iter()
.enumerate()
.filter_map(move |(i, s)| match s.header.get_type() {
Type::ProgBits => {
if s.header.sh_size > 0 {
if match &s.contents {
Contents64::Raw(v) => Some(v),
_ => None,
}
.is_some()
{
let si = SectionInfo {
file_size: s.header.sh_size,
data_size: s.header.sh_size,
data_index: Some(DataIndex::new(self.object_index, i)),
offset: s.header.sh_offset,
};
let s_name: &str = &s.name;
match Section::try_from((s_name, si)) {
Ok(s) => Some(Ok(s)),
Err(Error::InvalidSectionName) => None, // skip
Err(err) => Some(Err(err)),
}
} else {
Some(Err(Error::InvalidSectionData))
}
} else {
None
}
}
Type::NoBits => Some(Ok(Section::Bss(SectionInfo {
file_size: 0,
data_size: s.header.sh_size,
data_index: None,
offset: s.header.sh_offset,
}))),
_ => None,
});
let iter = (0..self.ehdr.e_shnum).into_iter().filter_map(move |i| {
let i_usize = usize::from(i);
let sh_start = self.ehdr.e_shoff as usize;
let sh_size = usize::from(self.ehdr.e_shentsize);
let offset: usize = sh_start + i_usize * sh_size;
let shr: Result<Shdr64, Error> =
bincode::deserialize(&self.data[offset..offset + sh_size])
.map_err(|err| Error::ParseError(err));
match shr {
Err(err) => Some(Err(err)),
Ok(sh) => self.make_section(offset, &sh),
}
});
Box::new(iter)
}
fn section_data(&self, index: DataIndex) -> Result<&[u8], Error> {
let section = &self.elf.sections[index.section_index];
match &section.contents {
Contents64::Raw(v) => Ok(&v),
_ => Err(Error::InvalidSectionData),
}
fn bytes(&self, offset: u64, size: u64) -> Result<&[u8], Error> {
let o = offset as usize;
let s = size as usize;
Ok(&self.data[o..o + s])
}
fn symbols(&self) -> SymbolIterBox {
if let Some(strtab_section) = self
.elf
.first_section_by(|s64| s64.header.get_type() == Type::StrTab && s64.name == ".strtab")
{
let strtab = match &strtab_section.contents {
Contents64::Raw(bytes) => bytes,
_ => panic!("Unexpected strtab content type"),
};
let iter = self
.elf
.sections
.iter()
.filter_map(move |s| match &s.contents {
Contents64::Symbols(symbols) => {
Some(symbols.iter().filter_map(move |sym| match sym.get_bind() {
Bind::Global | Bind::Local | Bind::Weak => {
Some(self.make_symbol(sym, strtab))
}
_ => None,
}))
}
_ => None,
})
.flatten();
Box::new(iter)
} else {
Box::new(std::iter::empty())
}
// if let Some(strtab_section) = self
// .elf
// .first_section_by(|s64| s64.header.get_type() == Type::StrTab && s64.name == ".strtab")
// {
// let strtab = match &strtab_section.contents {
// Contents64::Raw(bytes) => bytes,
// _ => panic!("Unexpected strtab content type"),
// };
// let iter = self
// .elf
// .sections
// .iter()
// .filter_map(move |s| match &s.contents {
// Contents64::Symbols(symbols) => {
// Some(symbols.iter().filter_map(move |sym| match sym.get_bind() {
// Bind::Global | Bind::Local | Bind::Weak => {
// Some(self.make_symbol(sym, strtab))
// }
// _ => None,
// }))
// }
// _ => None,
// })
// .flatten();
// Box::new(iter)
// } else {
Box::new(std::iter::empty())
//}
}
}
@ -188,12 +206,27 @@ fn parse_strtab_name(strtab: &[u8], idx: u32) -> String {
.to_string()
}
fn is_relocatable(elf: &ELF64) -> Result<(), Error> {
fn parse_elf_header(data: &Mmap) -> Result<Ehdr64, Error> {
use elf_utilities::header::Type;
if elf.ehdr.get_type() != Type::Rel {
return Err(Error::InvalidObjectType(elf.ehdr.e_type.into()));
let magic = &data[0..4];
if magic != &[0x7f, 0x45, 0x4c, 0x46] {
return Err(Error::InvalidInput);
}
let ehdr = Ehdr64::deserialize(data, 0).map_err(|err| Error::ParseError(err))?;
if ehdr.get_type() != Type::Rel {
return Err(Error::InvalidObjectType(u32::from(ehdr.e_type)));
}
Ok(())
Ok(ehdr)
}
fn parse_shstrtab(data: &Mmap, ehdr: &Ehdr64) -> Result<Shdr64, Error> {
let idx: usize = ehdr.e_shstrndx.into();
let sh_start: usize = ehdr.e_shoff as usize;
let sh_size: usize = ehdr.e_shentsize.into();
let offset: usize = sh_start + sh_size * idx;
bincode::deserialize(&data[offset..offset + sh_size]).map_err(|err| Error::ParseError(err))
}

@ -1,10 +1,9 @@
use std::{
convert::TryInto,
fs::File,
io::BufWriter,
io::{Seek, SeekFrom, Write},
io::{Write},
mem::size_of,
path::{Path, PathBuf},
path::{PathBuf},
};
use elf_utilities::{
@ -14,7 +13,7 @@ use elf_utilities::{
};
use crate::{
common::{Loadable, Output, SegmentType},
common::{expand_path, pad_to_next_page, Loadable, Output, SegmentType},
error::Error,
};
@ -50,7 +49,7 @@ impl Output<ElfObject> for ElfOutput {
ehdr.e_shnum = Loadable::section_count() as u16 + 1; // +1 for .shstrtab
ehdr.e_phnum = Loadable::segment_count() as u16;
ehdr.e_shstrndx = ehdr.e_shnum - 1; // .shstrab is always last
ehdr.e_entry = get_start_offset(loadable)?;
ehdr.e_entry = loadable.start_offset()?;
ehdr.e_phentsize = PHS;
ehdr.e_shentsize = SHS;
ehdr.e_phoff = EHS;
@ -214,38 +213,3 @@ fn make_strtab() -> (Vec<u8>, Vec<usize>) {
(strtab_bytes, indexes)
}
fn get_start_offset(loadable: &Loadable) -> Result<u64, Error> {
use crate::error::LinkError;
loadable.start_offset.ok_or_else(|| {
let link_error = LinkError {
message: "Program entrypoint not found".into(),
..Default::default()
};
Error::LinkingError(link_error)
})
}
fn expand_path(path: &Path) -> Result<&str, Error> {
use std::io::Error as IOError;
use std::io::ErrorKind;
path.to_str().ok_or_else(|| {
let ioe = IOError::new(ErrorKind::Other, "Path expansion fail");
let boxed = Box::new(ioe);
Error::IOError(boxed)
})
}
fn pad_to_next_page(writer: &mut BufWriter<File>, offset: usize) -> Result<usize, Error> {
let page_size = page_size::get();
let padding = page_size - (offset % page_size);
eprintln!("Padding from: {} with: {}", offset, padding);
writer.seek(SeekFrom::Current(padding.try_into()?))?;
Ok(padding)
}

@ -1,7 +1,7 @@
use std::{fmt::Display, path::PathBuf};
use crate::{
common::{DataIndex, Loadable, Output, Relocatable},
common::{Loadable, Output, Relocatable},
error::Error,
};
@ -29,7 +29,7 @@ where
}
pub fn add_relocatable(&mut self, origin: PathBuf) -> Result<(), Error> {
let r = R::new(origin, DataIndex::for_object(self.relocatables.len()))?;
let r = R::new(origin, self.relocatables.len())?;
// TODO: parallelize?
for section in r.sections() {
@ -44,7 +44,7 @@ where
pub fn link(mut self) -> Result<PathBuf, Error> {
self.process_symbols()?;
self.loadable.start_offset = Some(4096); // TODO: get from .start symbol location
self.loadable.set_start_offset(4096); // TODO: get from .start symbol location
self.output.finalize(&self.relocatables, &self.loadable)
}

Loading…
Cancel
Save