From 6f4b43101f7b46e38c2f61c2f859347085d8214f Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 1 Jul 2023 18:26:50 +1200 Subject: [PATCH 01/10] A `git ls-tree` example - Implements some of the functionality provided by `git ls-tree`. - Given the additional parsing of arguments, copying what libgit2 did and use `structopt` as an additional dev-dependencies. --- Cargo.lock | 1 + gix/Cargo.toml | 1 + gix/examples/ls-tree.rs | 116 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 gix/examples/ls-tree.rs diff --git a/Cargo.lock b/Cargo.lock index b27a78c22bf..f4bb4b9d378 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1272,6 +1272,7 @@ version = "0.48.0" dependencies = [ "anyhow", "async-std", + "clap", "document-features", "gix-actor 0.23.0", "gix-attributes 0.14.1", diff --git a/gix/Cargo.toml b/gix/Cargo.toml index fa46c2cad19..8a8a32ab759 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -183,6 +183,7 @@ is_ci = "1.1.1" anyhow = "1" walkdir = "2.3.2" serial_test = { version = "2.0.0", default-features = false } +clap = { version = "4.1.1", features = ["derive"] } async-std = { version = "1.12.0", features = ["attributes"] } [package.metadata.docs.rs] diff --git a/gix/examples/ls-tree.rs b/gix/examples/ls-tree.rs new file mode 100644 index 00000000000..4dd00a73f26 --- /dev/null +++ b/gix/examples/ls-tree.rs @@ -0,0 +1,116 @@ +use clap::Parser; +use gix::{ + ObjectId, + object::Kind, + objs::tree::EntryMode, + objs::tree::EntryMode::Tree, + traverse::tree::Recorder, +}; + +fn main() { + let args = Args::parse_from(gix::env::args_os()); + match run(&args) { + Ok(()) => {} + Err(e) => eprintln!("error: {e}"), + } +} + +#[derive(Debug, clap::Parser)] +#[clap(name = "ls-tree", about = "git ls-tree example", version = option_env!("GITOXIDE_VERSION"))] +#[clap(arg_required_else_help = true)] +struct Args { + #[clap(short = 'r')] + /// Recurse into subtrees + recursive: bool, + #[clap(short = 'd')] + /// Only show trees + tree_only: bool, + #[clap(short = 't')] + /// Show trees when recursing + tree_recursing: bool, + #[clap(name = "tree-ish")] + /// A revspec pointing to a tree-ish object, e.g. 'HEAD', 'HEAD:src/' + treeish: String, +} + +fn run(args: &Args) -> anyhow::Result<()> { + let repo = gix::discover(".")?; + let rev_spec = repo.rev_parse_single(&*args.treeish)?; + let object = rev_spec.object()?; + let tree = match object.kind { + Kind::Commit => object.try_into_commit()?.tree()?, + Kind::Tree => object.try_into_tree()?, + _ => anyhow::bail!("not a tree-ish object"), + }; + // Would like to take the entry arguments directly, but now there is + // no common trait implementing common field assessors for that. + let entries = if args.recursive { + let mut recorder = Recorder::default(); + tree.traverse() + .breadthfirst(&mut recorder)?; + recorder.records.iter() + .filter(|entry| args.tree_recursing + || args.tree_only + || entry.mode != Tree + ) + .filter(|entry| !args.tree_only || (entry.mode == Tree)) + .map(|entry| Entry::new( + entry.mode, + entry.oid, + entry.filepath.to_string(), + )) + .collect::>() + } + else { + tree.iter() + .filter_map(std::result::Result::ok) // dropping errors silently + .filter(|entry| !args.tree_only || (entry.mode() == Tree)) + .map(|entry| Entry::new( + entry.inner.mode, + entry.id().detach(), + entry.inner.filename.to_string(), + )) + .collect::>() + }; + + for entry in entries { + println!("{entry}"); + } + + Ok(()) +} + +// Helper struct and impl to facilitate displaying as per `git ls-tree`. +use std::fmt::{ + Display, + Formatter, +}; + +struct Entry { + kind: EntryMode, + hash: ObjectId, + path: String, +} + +impl Entry { + fn new(kind: EntryMode, hash: ObjectId, path: String) -> Self { + Self { + kind, + hash, + path, + } + } +} + +impl Display for Entry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{:06o} {:4} {} {}", + self.kind as u16, + self.kind.as_str().to_string(), + self.hash, + self.path, + ) + } +} From 03b342306c5effac5e8aa92a349385e59785c0b7 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 2 Jul 2023 19:02:47 +1200 Subject: [PATCH 02/10] A `git log` example - A rather naive first cut. --- gix/examples/log.rs | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 gix/examples/log.rs diff --git a/gix/examples/log.rs b/gix/examples/log.rs new file mode 100644 index 00000000000..8e207e426e1 --- /dev/null +++ b/gix/examples/log.rs @@ -0,0 +1,108 @@ +use clap::Parser; +use gix::{ + date::time::format, + object::Kind, + objs::CommitRef, + traverse::commit::Sorting, +}; + +fn main() { + let args = Args::parse_from(gix::env::args_os()); + match run(&args) { + Ok(()) => {} + Err(e) => eprintln!("error: {e}"), + } +} + +#[derive(Debug, clap::Parser)] +#[clap(name = "log", about = "git log example", version = option_env!("GITOXIDE_VERSION"))] +struct Args { + #[clap(name = "dir", long = "git-dir")] + /// Alternative git directory to use + git_dir: Option, + #[clap(name = "count", short = 'c', long = "count")] + /// Number of commits to return + count: Option, + #[clap(name = "commit")] + /// The starting commit + commitish: Option, + #[clap(name = "path")] + /// The path interested in log history of + path: Option, +} + +fn run(args: &Args) -> anyhow::Result<()> { + let repo = gix::discover( + args.git_dir.as_ref().map_or(".", |s| &s[..]) + )?; + let object = repo.rev_parse_single( + args.commitish.as_ref().map_or("HEAD", |s| &s[..]) + )?.object()?; + let commit = match object.kind { + Kind::Commit => object.try_into_commit()?, + _ => anyhow::bail!("not a commit object"), + }; + + let log_entry_iter = repo + .rev_walk([commit.id]) + .sorting(Sorting::ByCommitTimeNewestFirst) + .all()? + .filter(|info| info.as_ref() + .map_or(true, |info| args.path.as_ref().map_or(true, |path| { + // TODO should make use of the `git2::DiffOptions` + // counterpart in gix for a set of files and also to + // generate diffs. + let oid = repo.rev_parse_single( + format!("{}:{}", info.id, path).as_str() + ).ok(); + !info.parent_ids + .iter() + .all(|id| repo.rev_parse_single( + format!("{id}:{path}").as_str() + ).ok() == oid) + })) + ) + .map(|info| { + let commit = info?.object()?; + let commit_ref = CommitRef::from_bytes(&commit.data)?; + let committer = commit_ref.committer; + Ok(LogEntryInfo { + commit_id: format!("{}", commit.id()), + author: format!("{} <{}>", + commit_ref.author.name, commit_ref.author.email), + committer: format!("{} <{}>", + commit_ref.committer.name, commit_ref.committer.email), + commit_time: committer.time.format(format::DEFAULT), + message: commit_ref.message.to_string(), + }) + }); + + // Collect all items into a Vec to be lazy in code writing + let log_entries = match args.count { + Some(count) => log_entry_iter + .take(count) + .collect::>>()?, + None => log_entry_iter + .collect::>>()?, + }; + + for entry in log_entries { + println!("commit {}", entry.commit_id); + println!("Author: {}", entry.committer); + println!("Date: {}\n", entry.commit_time); + for line in entry.message.lines() { + println!(" {line}"); + } + println!(); + } + + Ok(()) +} + +pub struct LogEntryInfo { + pub commit_id: String, + pub author: String, + pub committer: String, + pub commit_time: String, + pub message: String, +} From 5cbb6a72c34d926a2782569d8370e54d4c63ab34 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 2 Jul 2023 19:42:04 +1200 Subject: [PATCH 03/10] `git log` example now shows merge parents - Also, output updated to be equivalent to the one produced by running `git log --full-history `. - Shouldn't have mixed author/commit time (it was always author time being displayed by default). --- gix/examples/log.rs | 82 ++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index 8e207e426e1..4f2edbeaf8f 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -20,9 +20,21 @@ struct Args { #[clap(name = "dir", long = "git-dir")] /// Alternative git directory to use git_dir: Option, - #[clap(name = "count", short = 'c', long = "count")] + #[clap(short, long)] /// Number of commits to return count: Option, + #[clap(short, long)] + /// Number of commits to skip + skip: Option, + #[clap(short, long)] + /// Commits are sorted as they are mentioned in the commit graph. + breadth_first: bool, + #[clap(short, long)] + /// Commits are sorted by their commit time in descending order. + newest_first: bool, + #[clap(short, long)] + /// Reverse the commit sort order + reverse: bool, #[clap(name = "commit")] /// The starting commit commitish: Option, @@ -43,9 +55,17 @@ fn run(args: &Args) -> anyhow::Result<()> { _ => anyhow::bail!("not a commit object"), }; - let log_entry_iter = repo + // TODO better way to deal with these flags. + let sorting = if args.breadth_first { + Sorting::BreadthFirst + } + else { // else if args.newest_first { + Sorting::ByCommitTimeNewestFirst + }; + + let mut log_entries = repo .rev_walk([commit.id]) - .sorting(Sorting::ByCommitTimeNewestFirst) + .sorting(sorting) .all()? .filter(|info| info.as_ref() .map_or(true, |info| args.path.as_ref().map_or(true, |path| { @@ -63,37 +83,53 @@ fn run(args: &Args) -> anyhow::Result<()> { })) ) .map(|info| { - let commit = info?.object()?; + let info = info?; + let commit = info.object()?; let commit_ref = CommitRef::from_bytes(&commit.data)?; - let committer = commit_ref.committer; Ok(LogEntryInfo { commit_id: format!("{}", commit.id()), + parents: info.parent_ids.iter() + // probably could have a better way to display this + .map(|x| x.to_string()[..7].to_string()) + .collect(), author: format!("{} <{}>", commit_ref.author.name, commit_ref.author.email), - committer: format!("{} <{}>", - commit_ref.committer.name, commit_ref.committer.email), - commit_time: committer.time.format(format::DEFAULT), + time: commit_ref.author.time.format(format::DEFAULT), message: commit_ref.message.to_string(), }) - }); + }) + // The more memory efficient way is to only collect if reverse; + // this example is lazy coding rather than lazy execution... + .collect::>>()?; - // Collect all items into a Vec to be lazy in code writing - let log_entries = match args.count { - Some(count) => log_entry_iter - .take(count) - .collect::>>()?, - None => log_entry_iter - .collect::>>()?, - }; + args.reverse.then(|| log_entries.reverse()); + + // ... as ultimately we will have an iterator here again... + let mut log_iter: Box> = Box::new( + log_entries.iter()); - for entry in log_entries { + if let Some(n) = args.skip { + log_iter = Box::new(log_iter.skip(n)); + } + if let Some(n) = args.count { + log_iter = Box::new(log_iter.take(n)); + } + + let mut log_iter = log_iter.peekable(); + + while let Some(entry) = log_iter.next() { println!("commit {}", entry.commit_id); - println!("Author: {}", entry.committer); - println!("Date: {}\n", entry.commit_time); + if entry.parents.len() > 1 { + println!("Merge: {}", entry.parents.join(" ")); + } + println!("Author: {}", entry.author); + println!("Date: {}\n", entry.time); for line in entry.message.lines() { println!(" {line}"); } - println!(); + if log_iter.peek().is_some() { + println!(); + } } Ok(()) @@ -101,8 +137,8 @@ fn run(args: &Args) -> anyhow::Result<()> { pub struct LogEntryInfo { pub commit_id: String, + pub parents: Vec, pub author: String, - pub committer: String, - pub commit_time: String, + pub time: String, pub message: String, } From 8a6f1e89fa4d736a2c902be55413887e14885957 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 16 Jul 2023 16:17:05 +1200 Subject: [PATCH 04/10] `git log` example iterator now properly lazy - Unless reverse is required - the git log DAG is unidirectional, the whole graph must be read in order for it to be reversed - libgit2 does this internally when the `GIT_SORT_REVERSE` is specified. See: https://github.com/libgit2/libgit2/blob/v1.6.4/src/libgit2/revwalk.c#L659-L669 - Include some comments that might hopefully help future readers. --- gix/examples/log.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index 4f2edbeaf8f..ff4c3dd5ebd 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -63,18 +63,25 @@ fn run(args: &Args) -> anyhow::Result<()> { Sorting::ByCommitTimeNewestFirst }; - let mut log_entries = repo + let mut log_iter: Box>> = Box::new(repo .rev_walk([commit.id]) .sorting(sorting) .all()? .filter(|info| info.as_ref() + // TODO the other implementation can take a sequence of + // paths - if so it should apply this check for all paths. .map_or(true, |info| args.path.as_ref().map_or(true, |path| { // TODO should make use of the `git2::DiffOptions` // counterpart in gix for a set of files and also to // generate diffs. + // should args.path be provided, check that it is in + // fact relevant for this commit (it present?) let oid = repo.rev_parse_single( format!("{}:{}", info.id, path).as_str() ).ok(); + // check via the revspec on the path prefixed by the + // tree of the current commit vs. commit's every parents + // and see if all matching, if not, include this entry. !info.parent_ids .iter() .all(|id| repo.rev_parse_single( @@ -86,7 +93,7 @@ fn run(args: &Args) -> anyhow::Result<()> { let info = info?; let commit = info.object()?; let commit_ref = CommitRef::from_bytes(&commit.data)?; - Ok(LogEntryInfo { + Ok::<_, anyhow::Error>(LogEntryInfo { commit_id: format!("{}", commit.id()), parents: info.parent_ids.iter() // probably could have a better way to display this @@ -98,26 +105,22 @@ fn run(args: &Args) -> anyhow::Result<()> { message: commit_ref.message.to_string(), }) }) - // The more memory efficient way is to only collect if reverse; - // this example is lazy coding rather than lazy execution... - .collect::>>()?; - - args.reverse.then(|| log_entries.reverse()); - - // ... as ultimately we will have an iterator here again... - let mut log_iter: Box> = Box::new( - log_entries.iter()); - + ); + if args.reverse { + let mut results = log_iter.collect::>(); + results.reverse(); + log_iter = Box::new(results.into_iter()) + } if let Some(n) = args.skip { log_iter = Box::new(log_iter.skip(n)); } if let Some(n) = args.count { log_iter = Box::new(log_iter.take(n)); } - let mut log_iter = log_iter.peekable(); while let Some(entry) = log_iter.next() { + let entry = entry?; println!("commit {}", entry.commit_id); if entry.parents.len() > 1 { println!("Merge: {}", entry.parents.join(" ")); @@ -127,6 +130,7 @@ fn run(args: &Args) -> anyhow::Result<()> { for line in entry.message.lines() { println!(" {line}"); } + // only include newline if more log entries, mimicking `git log` if log_iter.peek().is_some() { println!(); } From 01e9c29bf7106b30d8e3e8c71b37eff77bcc38b5 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 16 Jul 2023 17:33:05 +1200 Subject: [PATCH 05/10] `git log` example filter for min/max parents - This set of filters will report (non)merge commits by filtering out commits with min/max number of parents - Also refactored the path filter logic and restructure the layout of the code block plus add in comments to make fit as an example. --- gix/examples/log.rs | 74 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index ff4c3dd5ebd..1df7eb80fe9 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -32,6 +32,18 @@ struct Args { #[clap(short, long)] /// Commits are sorted by their commit time in descending order. newest_first: bool, + #[clap(long)] + /// Show commits with the specified minimum number of parents + min_parents: Option, + #[clap(long)] + /// Show commits with the specified maximum number of parents + max_parents: Option, + #[clap(long)] + /// Show only merge commits (implies --min-parents=2) + merges: bool, + #[clap(long)] + /// Show only non-merge commits (implies --max-parents=1) + no_merges: bool, #[clap(short, long)] /// Reverse the commit sort order reverse: bool, @@ -63,6 +75,15 @@ fn run(args: &Args) -> anyhow::Result<()> { Sorting::ByCommitTimeNewestFirst }; + let mut min_parents = args.min_parents.unwrap_or(0); + let mut max_parents = args.max_parents.unwrap_or(usize::MAX); + if args.merges { + min_parents = 2; + } + if args.no_merges { + max_parents = 1; + } + let mut log_iter: Box>> = Box::new(repo .rev_walk([commit.id]) .sorting(sorting) @@ -70,29 +91,44 @@ fn run(args: &Args) -> anyhow::Result<()> { .filter(|info| info.as_ref() // TODO the other implementation can take a sequence of // paths - if so it should apply this check for all paths. - .map_or(true, |info| args.path.as_ref().map_or(true, |path| { - // TODO should make use of the `git2::DiffOptions` - // counterpart in gix for a set of files and also to - // generate diffs. - // should args.path be provided, check that it is in - // fact relevant for this commit (it present?) - let oid = repo.rev_parse_single( - format!("{}:{}", info.id, path).as_str() - ).ok(); - // check via the revspec on the path prefixed by the - // tree of the current commit vs. commit's every parents - // and see if all matching, if not, include this entry. - !info.parent_ids - .iter() - .all(|id| repo.rev_parse_single( - format!("{id}:{path}").as_str() - ).ok() == oid) - })) + .map_or(true, |info| { + info.parent_ids.len() <= max_parents && + info.parent_ids.len() >= min_parents && + args.path.as_ref().map_or(true, |path| { + // should args.path be provided, check that it is in + // fact relevant for this commit (it present?) + // TODO should make use of the `git2::DiffOptions` + // counterpart in gix for a set of files and also to + // generate diffs. + match repo.rev_parse_single( + format!("{}:{}", info.id, path).as_str() + ) { + // check by parsing the revspec on the path with + // the prefix of the tree of the current commit, + // vs. the same counterpart but using each of + // commit's parents; if any pairs don't match, + // this indicates this path was changed in this + // commit thus should be included in output. + Ok(oid) => info.parent_ids + .iter() + .any(|id| { + repo.rev_parse_single( + format!("{id}:{path}").as_str() + ).ok() != Some(oid) + }), + // no oid for the path resolved with this commit + // so this commit can be omitted from output. + Err(_) => false, + } + }) + }) ) .map(|info| { let info = info?; let commit = info.object()?; let commit_ref = CommitRef::from_bytes(&commit.data)?; + // type specifier using turbofish for the OK type here is + // because this isn't being collected... Ok::<_, anyhow::Error>(LogEntryInfo { commit_id: format!("{}", commit.id()), parents: info.parent_ids.iter() @@ -117,6 +153,8 @@ fn run(args: &Args) -> anyhow::Result<()> { if let Some(n) = args.count { log_iter = Box::new(log_iter.take(n)); } + // ... if the goal is to have the results as a `Vec`: + // let results = log_iter.collect::>>()?; let mut log_iter = log_iter.peekable(); while let Some(entry) = log_iter.next() { From 0df9f707987c8001c4ca81faf69033c679a75fd5 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 16 Jul 2023 18:03:06 +1200 Subject: [PATCH 06/10] `git log` example now accepts multiple paths. --- gix/examples/log.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index 1df7eb80fe9..89146416762 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -52,7 +52,7 @@ struct Args { commitish: Option, #[clap(name = "path")] /// The path interested in log history of - path: Option, + paths: Vec, } fn run(args: &Args) -> anyhow::Result<()> { @@ -89,14 +89,12 @@ fn run(args: &Args) -> anyhow::Result<()> { .sorting(sorting) .all()? .filter(|info| info.as_ref() - // TODO the other implementation can take a sequence of - // paths - if so it should apply this check for all paths. .map_or(true, |info| { info.parent_ids.len() <= max_parents && info.parent_ids.len() >= min_parents && - args.path.as_ref().map_or(true, |path| { - // should args.path be provided, check that it is in - // fact relevant for this commit (it present?) + // if paths are provided check that any one of them are + // in fact relevant for the current commit. + args.paths.iter().map(|path| { // TODO should make use of the `git2::DiffOptions` // counterpart in gix for a set of files and also to // generate diffs. @@ -121,6 +119,7 @@ fn run(args: &Args) -> anyhow::Result<()> { Err(_) => false, } }) + .any(|r| r) }) ) .map(|info| { From bd59bbebddf804a4dd0872127dcc31b5c3b29c2f Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 16 Jul 2023 19:45:04 +1200 Subject: [PATCH 07/10] `git log` example include empty parents and paths - Also remember to test for empty list of paths and permit that through the filter as `any()` returns false on empty - likewise this applies to the check on list of parent_ids. --- gix/examples/log.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index 89146416762..2a92c099984 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -92,9 +92,10 @@ fn run(args: &Args) -> anyhow::Result<()> { .map_or(true, |info| { info.parent_ids.len() <= max_parents && info.parent_ids.len() >= min_parents && + // if the list of paths is empty the filter passes. // if paths are provided check that any one of them are // in fact relevant for the current commit. - args.paths.iter().map(|path| { + (args.paths.is_empty() || args.paths.iter().map(|path| { // TODO should make use of the `git2::DiffOptions` // counterpart in gix for a set of files and also to // generate diffs. @@ -107,7 +108,11 @@ fn run(args: &Args) -> anyhow::Result<()> { // commit's parents; if any pairs don't match, // this indicates this path was changed in this // commit thus should be included in output. - Ok(oid) => info.parent_ids + // naturally, root commits have no parents and + // by definition whatever paths in there must + // have been introduced there, so include them. + Ok(oid) => info.parent_ids.is_empty() || info + .parent_ids .iter() .any(|id| { repo.rev_parse_single( @@ -119,7 +124,7 @@ fn run(args: &Args) -> anyhow::Result<()> { Err(_) => false, } }) - .any(|r| r) + .any(|r| r)) }) ) .map(|info| { From 6121b8f6a7da7f263c6e066155f053a1d7c81477 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 16 Jul 2023 23:20:15 +1200 Subject: [PATCH 08/10] cargo fmt --- gix/examples/log.rs | 140 +++++++++++++++++++--------------------- gix/examples/ls-tree.rs | 48 ++++---------- 2 files changed, 79 insertions(+), 109 deletions(-) diff --git a/gix/examples/log.rs b/gix/examples/log.rs index 2a92c099984..8a56d5ab8e0 100644 --- a/gix/examples/log.rs +++ b/gix/examples/log.rs @@ -1,10 +1,5 @@ use clap::Parser; -use gix::{ - date::time::format, - object::Kind, - objs::CommitRef, - traverse::commit::Sorting, -}; +use gix::{date::time::format, object::Kind, objs::CommitRef, traverse::commit::Sorting}; fn main() { let args = Args::parse_from(gix::env::args_os()); @@ -56,12 +51,10 @@ struct Args { } fn run(args: &Args) -> anyhow::Result<()> { - let repo = gix::discover( - args.git_dir.as_ref().map_or(".", |s| &s[..]) - )?; - let object = repo.rev_parse_single( - args.commitish.as_ref().map_or("HEAD", |s| &s[..]) - )?.object()?; + let repo = gix::discover(args.git_dir.as_ref().map_or(".", |s| &s[..]))?; + let object = repo + .rev_parse_single(args.commitish.as_ref().map_or("HEAD", |s| &s[..]))? + .object()?; let commit = match object.kind { Kind::Commit => object.try_into_commit()?, _ => anyhow::bail!("not a commit object"), @@ -70,8 +63,8 @@ fn run(args: &Args) -> anyhow::Result<()> { // TODO better way to deal with these flags. let sorting = if args.breadth_first { Sorting::BreadthFirst - } - else { // else if args.newest_first { + } else { + // else if args.newest_first { Sorting::ByCommitTimeNewestFirst }; @@ -84,67 +77,68 @@ fn run(args: &Args) -> anyhow::Result<()> { max_parents = 1; } - let mut log_iter: Box>> = Box::new(repo - .rev_walk([commit.id]) - .sorting(sorting) - .all()? - .filter(|info| info.as_ref() - .map_or(true, |info| { - info.parent_ids.len() <= max_parents && - info.parent_ids.len() >= min_parents && - // if the list of paths is empty the filter passes. - // if paths are provided check that any one of them are - // in fact relevant for the current commit. - (args.paths.is_empty() || args.paths.iter().map(|path| { - // TODO should make use of the `git2::DiffOptions` - // counterpart in gix for a set of files and also to - // generate diffs. - match repo.rev_parse_single( - format!("{}:{}", info.id, path).as_str() - ) { - // check by parsing the revspec on the path with - // the prefix of the tree of the current commit, - // vs. the same counterpart but using each of - // commit's parents; if any pairs don't match, - // this indicates this path was changed in this - // commit thus should be included in output. - // naturally, root commits have no parents and - // by definition whatever paths in there must - // have been introduced there, so include them. - Ok(oid) => info.parent_ids.is_empty() || info - .parent_ids - .iter() - .any(|id| { - repo.rev_parse_single( - format!("{id}:{path}").as_str() - ).ok() != Some(oid) - }), - // no oid for the path resolved with this commit - // so this commit can be omitted from output. - Err(_) => false, - } + let mut log_iter: Box>> = Box::new( + repo.rev_walk([commit.id]) + .sorting(sorting) + .all()? + .filter(|info| { + info.as_ref().map_or(true, |info| { + info.parent_ids.len() <= max_parents && + info.parent_ids.len() >= min_parents && + // if the list of paths is empty the filter passes. + // if paths are provided check that any one of them are + // in fact relevant for the current commit. + (args.paths.is_empty() || args.paths.iter().map(|path| { + // TODO should make use of the `git2::DiffOptions` + // counterpart in gix for a set of files and also to + // generate diffs. + match repo.rev_parse_single( + format!("{}:{}", info.id, path).as_str() + ) { + // check by parsing the revspec on the path with + // the prefix of the tree of the current commit, + // vs. the same counterpart but using each of + // commit's parents; if any pairs don't match, + // this indicates this path was changed in this + // commit thus should be included in output. + // naturally, root commits have no parents and + // by definition whatever paths in there must + // have been introduced there, so include them. + Ok(oid) => info.parent_ids.is_empty() || info + .parent_ids + .iter() + .any(|id| { + repo.rev_parse_single( + format!("{id}:{path}").as_str() + ).ok() != Some(oid) + }), + // no oid for the path resolved with this commit + // so this commit can be omitted from output. + Err(_) => false, + } + }) + .any(|r| r)) }) - .any(|r| r)) - }) - ) - .map(|info| { - let info = info?; - let commit = info.object()?; - let commit_ref = CommitRef::from_bytes(&commit.data)?; - // type specifier using turbofish for the OK type here is - // because this isn't being collected... - Ok::<_, anyhow::Error>(LogEntryInfo { - commit_id: format!("{}", commit.id()), - parents: info.parent_ids.iter() - // probably could have a better way to display this - .map(|x| x.to_string()[..7].to_string()) - .collect(), - author: format!("{} <{}>", - commit_ref.author.name, commit_ref.author.email), - time: commit_ref.author.time.format(format::DEFAULT), - message: commit_ref.message.to_string(), }) - }) + .map(|info| { + let info = info?; + let commit = info.object()?; + let commit_ref = CommitRef::from_bytes(&commit.data)?; + // type specifier using turbofish for the OK type here is + // because this isn't being collected... + Ok::<_, anyhow::Error>(LogEntryInfo { + commit_id: format!("{}", commit.id()), + parents: info + .parent_ids + .iter() + // probably could have a better way to display this + .map(|x| x.to_string()[..7].to_string()) + .collect(), + author: format!("{} <{}>", commit_ref.author.name, commit_ref.author.email), + time: commit_ref.author.time.format(format::DEFAULT), + message: commit_ref.message.to_string(), + }) + }), ); if args.reverse { let mut results = log_iter.collect::>(); diff --git a/gix/examples/ls-tree.rs b/gix/examples/ls-tree.rs index 4dd00a73f26..f16f197d4dc 100644 --- a/gix/examples/ls-tree.rs +++ b/gix/examples/ls-tree.rs @@ -1,11 +1,5 @@ use clap::Parser; -use gix::{ - ObjectId, - object::Kind, - objs::tree::EntryMode, - objs::tree::EntryMode::Tree, - traverse::tree::Recorder, -}; +use gix::{object::Kind, objs::tree::EntryMode, objs::tree::EntryMode::Tree, traverse::tree::Recorder, ObjectId}; fn main() { let args = Args::parse_from(gix::env::args_os()); @@ -46,30 +40,19 @@ fn run(args: &Args) -> anyhow::Result<()> { // no common trait implementing common field assessors for that. let entries = if args.recursive { let mut recorder = Recorder::default(); - tree.traverse() - .breadthfirst(&mut recorder)?; - recorder.records.iter() - .filter(|entry| args.tree_recursing - || args.tree_only - || entry.mode != Tree - ) + tree.traverse().breadthfirst(&mut recorder)?; + recorder + .records + .iter() + .filter(|entry| args.tree_recursing || args.tree_only || entry.mode != Tree) .filter(|entry| !args.tree_only || (entry.mode == Tree)) - .map(|entry| Entry::new( - entry.mode, - entry.oid, - entry.filepath.to_string(), - )) + .map(|entry| Entry::new(entry.mode, entry.oid, entry.filepath.to_string())) .collect::>() - } - else { + } else { tree.iter() - .filter_map(std::result::Result::ok) // dropping errors silently + .filter_map(std::result::Result::ok) // dropping errors silently .filter(|entry| !args.tree_only || (entry.mode() == Tree)) - .map(|entry| Entry::new( - entry.inner.mode, - entry.id().detach(), - entry.inner.filename.to_string(), - )) + .map(|entry| Entry::new(entry.inner.mode, entry.id().detach(), entry.inner.filename.to_string())) .collect::>() }; @@ -81,10 +64,7 @@ fn run(args: &Args) -> anyhow::Result<()> { } // Helper struct and impl to facilitate displaying as per `git ls-tree`. -use std::fmt::{ - Display, - Formatter, -}; +use std::fmt::{Display, Formatter}; struct Entry { kind: EntryMode, @@ -94,11 +74,7 @@ struct Entry { impl Entry { fn new(kind: EntryMode, hash: ObjectId, path: String) -> Self { - Self { - kind, - hash, - path, - } + Self { kind, hash, path } } } From c05eb2204620a5ff5e04b766009c873a14ae0f9e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 17 Jul 2023 10:58:05 +0200 Subject: [PATCH 09/10] feat: top-level examples that represent fully-fledged command-line applications. Please note that these are just examples, which aren't necessarily production ready in terms of quality or performance. --- Cargo.lock | 1 - {gix/examples => examples}/log.rs | 0 {gix/examples => examples}/ls-tree.rs | 2 +- gix/Cargo.toml | 1 - 4 files changed, 1 insertion(+), 3 deletions(-) rename {gix/examples => examples}/log.rs (100%) rename {gix/examples => examples}/ls-tree.rs (98%) diff --git a/Cargo.lock b/Cargo.lock index f4bb4b9d378..b27a78c22bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1272,7 +1272,6 @@ version = "0.48.0" dependencies = [ "anyhow", "async-std", - "clap", "document-features", "gix-actor 0.23.0", "gix-attributes 0.14.1", diff --git a/gix/examples/log.rs b/examples/log.rs similarity index 100% rename from gix/examples/log.rs rename to examples/log.rs diff --git a/gix/examples/ls-tree.rs b/examples/ls-tree.rs similarity index 98% rename from gix/examples/ls-tree.rs rename to examples/ls-tree.rs index f16f197d4dc..c5ba6f9d0b5 100644 --- a/gix/examples/ls-tree.rs +++ b/examples/ls-tree.rs @@ -28,7 +28,7 @@ struct Args { } fn run(args: &Args) -> anyhow::Result<()> { - let repo = gix::discover(".")?; + let repo = gix::discover("..")?; let rev_spec = repo.rev_parse_single(&*args.treeish)?; let object = rev_spec.object()?; let tree = match object.kind { diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 8a8a32ab759..fa46c2cad19 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -183,7 +183,6 @@ is_ci = "1.1.1" anyhow = "1" walkdir = "2.3.2" serial_test = { version = "2.0.0", default-features = false } -clap = { version = "4.1.1", features = ["derive"] } async-std = { version = "1.12.0", features = ["attributes"] } [package.metadata.docs.rs] From 908f78424492b48b942cb772fa7ac41643a1034d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 17 Jul 2023 11:23:47 +0200 Subject: [PATCH 10/10] refactor --- examples/log.rs | 153 ++++++++++++++++++++++++-------------------- examples/ls-tree.rs | 75 ++++++++++------------ 2 files changed, 116 insertions(+), 112 deletions(-) diff --git a/examples/log.rs b/examples/log.rs index 8a56d5ab8e0..f340ca8549d 100644 --- a/examples/log.rs +++ b/examples/log.rs @@ -1,9 +1,13 @@ +/// A toy-version of `git log`. use clap::Parser; -use gix::{date::time::format, object::Kind, objs::CommitRef, traverse::commit::Sorting}; +use gix::bstr::{BString, ByteSlice}; +use gix::{date::time::format, traverse::commit::Sorting}; +use std::io::{stdout, Write}; +use std::path::{Path, PathBuf}; fn main() { let args = Args::parse_from(gix::env::args_os()); - match run(&args) { + match run(args) { Ok(()) => {} Err(e) => eprintln!("error: {e}"), } @@ -12,55 +16,59 @@ fn main() { #[derive(Debug, clap::Parser)] #[clap(name = "log", about = "git log example", version = option_env!("GITOXIDE_VERSION"))] struct Args { - #[clap(name = "dir", long = "git-dir")] /// Alternative git directory to use - git_dir: Option, - #[clap(short, long)] + #[clap(name = "dir", long = "git-dir")] + git_dir: Option, /// Number of commits to return - count: Option, #[clap(short, long)] + count: Option, /// Number of commits to skip - skip: Option, #[clap(short, long)] + skip: Option, /// Commits are sorted as they are mentioned in the commit graph. - breadth_first: bool, #[clap(short, long)] + breadth_first: bool, /// Commits are sorted by their commit time in descending order. + #[clap(short, long)] newest_first: bool, - #[clap(long)] /// Show commits with the specified minimum number of parents - min_parents: Option, #[clap(long)] + min_parents: Option, /// Show commits with the specified maximum number of parents - max_parents: Option, #[clap(long)] + max_parents: Option, /// Show only merge commits (implies --min-parents=2) - merges: bool, #[clap(long)] + merges: bool, /// Show only non-merge commits (implies --max-parents=1) + #[clap(long)] no_merges: bool, + /// Reverse the commit sort order (and loads all of them into memory). #[clap(short, long)] - /// Reverse the commit sort order reverse: bool, + /// The ref-spec for the first commit to log, or HEAD. #[clap(name = "commit")] - /// The starting commit - commitish: Option, - #[clap(name = "path")] + committish: Option, /// The path interested in log history of - paths: Vec, + #[clap(name = "path")] + paths: Vec, } -fn run(args: &Args) -> anyhow::Result<()> { - let repo = gix::discover(args.git_dir.as_ref().map_or(".", |s| &s[..]))?; - let object = repo - .rev_parse_single(args.commitish.as_ref().map_or("HEAD", |s| &s[..]))? - .object()?; - let commit = match object.kind { - Kind::Commit => object.try_into_commit()?, - _ => anyhow::bail!("not a commit object"), - }; +fn run(args: Args) -> anyhow::Result<()> { + let repo = gix::discover(args.git_dir.as_deref().unwrap_or(Path::new(".")))?; + let commit = repo + .rev_parse_single({ + args.committish + .map(|mut c| { + c.push_str("^{commit}"); + c + }) + .as_deref() + .unwrap_or("HEAD") + })? + .object()? + .try_into_commit()?; - // TODO better way to deal with these flags. let sorting = if args.breadth_first { Sorting::BreadthFirst } else { @@ -88,12 +96,17 @@ fn run(args: &Args) -> anyhow::Result<()> { // if the list of paths is empty the filter passes. // if paths are provided check that any one of them are // in fact relevant for the current commit. - (args.paths.is_empty() || args.paths.iter().map(|path| { - // TODO should make use of the `git2::DiffOptions` - // counterpart in gix for a set of files and also to - // generate diffs. + (args.paths.is_empty() || args.paths.iter().any(|path| { + // TODO: should make use of the `git2::DiffOptions` + // counterpart in gix for a set of files and also to + // generate diffs. When ready, also make paths resistant + // to illformed UTF8 by not using ".display()". + // PERFORMANCE WARNING: What follows is a clever implementation + // that is also **very** slow - do not use on bigger sample + // repositories as this needs native support in `gix` to + // be fast enough. match repo.rev_parse_single( - format!("{}:{}", info.id, path).as_str() + format!("{}:{}", info.id, path.display()).as_str() ) { // check by parsing the revspec on the path with // the prefix of the tree of the current commit, @@ -109,76 +122,74 @@ fn run(args: &Args) -> anyhow::Result<()> { .iter() .any(|id| { repo.rev_parse_single( - format!("{id}:{path}").as_str() + format!("{id}:{}", path.display()).as_str() ).ok() != Some(oid) }), // no oid for the path resolved with this commit // so this commit can be omitted from output. Err(_) => false, } - }) - .any(|r| r)) + })) }) }) - .map(|info| { + .map(|info| -> anyhow::Result<_> { let info = info?; let commit = info.object()?; - let commit_ref = CommitRef::from_bytes(&commit.data)?; - // type specifier using turbofish for the OK type here is - // because this isn't being collected... - Ok::<_, anyhow::Error>(LogEntryInfo { - commit_id: format!("{}", commit.id()), - parents: info - .parent_ids - .iter() - // probably could have a better way to display this - .map(|x| x.to_string()[..7].to_string()) - .collect(), - author: format!("{} <{}>", commit_ref.author.name, commit_ref.author.email), + let commit_ref = commit.decode()?; + Ok(LogEntryInfo { + commit_id: commit.id().to_hex().to_string(), + parents: info.parent_ids().map(|id| id.shorten_or_id().to_string()).collect(), + author: { + let mut buf = Vec::new(); + commit_ref.author.write_to(&mut buf)?; + buf.into() + }, time: commit_ref.author.time.format(format::DEFAULT), - message: commit_ref.message.to_string(), + message: commit_ref.message.to_owned(), }) }), ); if args.reverse { - let mut results = log_iter.collect::>(); + let mut results: Vec<_> = log_iter.collect(); results.reverse(); log_iter = Box::new(results.into_iter()) } - if let Some(n) = args.skip { - log_iter = Box::new(log_iter.skip(n)); - } - if let Some(n) = args.count { - log_iter = Box::new(log_iter.take(n)); - } - // ... if the goal is to have the results as a `Vec`: - // let results = log_iter.collect::>>()?; - let mut log_iter = log_iter.peekable(); + let mut log_iter = log_iter + .skip(args.skip.unwrap_or_default()) + .take(args.count.unwrap_or(usize::MAX)) + .peekable(); + + let mut out = stdout().lock(); + let mut buf = Vec::new(); while let Some(entry) = log_iter.next() { + buf.clear(); let entry = entry?; - println!("commit {}", entry.commit_id); + writeln!(buf, "commit {}", entry.commit_id)?; if entry.parents.len() > 1 { - println!("Merge: {}", entry.parents.join(" ")); + writeln!(buf, "Merge: {}", entry.parents.join(" "))?; } - println!("Author: {}", entry.author); - println!("Date: {}\n", entry.time); + writeln!(buf, "Author: {}", entry.author)?; + writeln!(buf, "Date: {}\n", entry.time)?; for line in entry.message.lines() { - println!(" {line}"); + write!(buf, " ")?; + buf.write_all(line)?; + writeln!(buf)?; } // only include newline if more log entries, mimicking `git log` if log_iter.peek().is_some() { - println!(); + writeln!(buf)?; } + out.write_all(&buf)?; } Ok(()) } -pub struct LogEntryInfo { - pub commit_id: String, - pub parents: Vec, - pub author: String, - pub time: String, - pub message: String, +struct LogEntryInfo { + commit_id: String, + parents: Vec, + author: BString, + time: String, + message: BString, } diff --git a/examples/ls-tree.rs b/examples/ls-tree.rs index c5ba6f9d0b5..449f432f6aa 100644 --- a/examples/ls-tree.rs +++ b/examples/ls-tree.rs @@ -1,9 +1,12 @@ use clap::Parser; -use gix::{object::Kind, objs::tree::EntryMode, objs::tree::EntryMode::Tree, traverse::tree::Recorder, ObjectId}; +use gix::bstr::BString; +use std::io::{stdout, Write}; + +use gix::{objs::tree::EntryMode, objs::tree::EntryMode::Tree, traverse::tree::Recorder, ObjectId}; fn main() { let args = Args::parse_from(gix::env::args_os()); - match run(&args) { + match run(args) { Ok(()) => {} Err(e) => eprintln!("error: {e}"), } @@ -13,80 +16,70 @@ fn main() { #[clap(name = "ls-tree", about = "git ls-tree example", version = option_env!("GITOXIDE_VERSION"))] #[clap(arg_required_else_help = true)] struct Args { - #[clap(short = 'r')] /// Recurse into subtrees + #[clap(short = 'r')] recursive: bool, - #[clap(short = 'd')] /// Only show trees + #[clap(short = 'd')] tree_only: bool, - #[clap(short = 't')] /// Show trees when recursing + #[clap(short = 't')] tree_recursing: bool, - #[clap(name = "tree-ish")] /// A revspec pointing to a tree-ish object, e.g. 'HEAD', 'HEAD:src/' + #[clap(name = "tree-ish")] treeish: String, } -fn run(args: &Args) -> anyhow::Result<()> { - let repo = gix::discover("..")?; - let rev_spec = repo.rev_parse_single(&*args.treeish)?; - let object = rev_spec.object()?; - let tree = match object.kind { - Kind::Commit => object.try_into_commit()?.tree()?, - Kind::Tree => object.try_into_tree()?, - _ => anyhow::bail!("not a tree-ish object"), - }; - // Would like to take the entry arguments directly, but now there is - // no common trait implementing common field assessors for that. +fn run(mut args: Args) -> anyhow::Result<()> { + let repo = gix::discover(".")?; + let tree = repo + .rev_parse_single({ + args.treeish.push_str("^{tree}"); + &*args.treeish + })? + .object()? + .into_tree(); let entries = if args.recursive { let mut recorder = Recorder::default(); tree.traverse().breadthfirst(&mut recorder)?; recorder .records - .iter() + .into_iter() .filter(|entry| args.tree_recursing || args.tree_only || entry.mode != Tree) .filter(|entry| !args.tree_only || (entry.mode == Tree)) - .map(|entry| Entry::new(entry.mode, entry.oid, entry.filepath.to_string())) + .map(|entry| Entry::new(entry.mode, entry.oid, entry.filepath)) .collect::>() } else { tree.iter() - .filter_map(std::result::Result::ok) // dropping errors silently - .filter(|entry| !args.tree_only || (entry.mode() == Tree)) - .map(|entry| Entry::new(entry.inner.mode, entry.id().detach(), entry.inner.filename.to_string())) + .filter_map(|res| res.ok().map(|entry| entry.inner)) // dropping errors silently + .filter(|entry| !args.tree_only || (entry.mode == Tree)) + .map(|entry| Entry::new(entry.mode, entry.oid.to_owned(), entry.filename.to_owned())) .collect::>() }; + let mut out = stdout().lock(); for entry in entries { - println!("{entry}"); + writeln!( + out, + "{:06o} {:4} {} {}", + entry.kind as u16, + entry.kind.as_str(), + entry.hash, + entry.path + )?; } Ok(()) } -// Helper struct and impl to facilitate displaying as per `git ls-tree`. -use std::fmt::{Display, Formatter}; - struct Entry { kind: EntryMode, hash: ObjectId, - path: String, + path: BString, } impl Entry { - fn new(kind: EntryMode, hash: ObjectId, path: String) -> Self { + fn new(kind: EntryMode, hash: ObjectId, path: BString) -> Self { Self { kind, hash, path } } } - -impl Display for Entry { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{:06o} {:4} {} {}", - self.kind as u16, - self.kind.as_str().to_string(), - self.hash, - self.path, - ) - } -}