diff --git a/.gitignore b/.gitignore index 08b497d..be27472 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ linux -bpf +mails/bpf linux/commits.csv diff --git a/Makefile b/Makefile index ff5c0de..cca5749 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,6 @@ linux: git clone https://github.com/torvalds/linux -bpf: - git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git - cd bpf/git/0.git && ../../../dump.sh +mails/bpf: + cd mails && git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git -linux/commits.csv: - cd linux && git log --pretty=format:'"%H","%an","%ae","%ad","%cn","%ce","%cd","%T","%P","%B"' --date=iso > commits.csv \ No newline at end of file diff --git a/README.md b/README.md index 2ce41a2..2c30102 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,7 @@ -# test-git +# bpf subsystem -``` -git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git +- Wed, 13 Feb 2019 Until 10 Sep 2024: 126469 mails -cd bpf/git/0.git -../../../dump.sh -``` Feature versions: diff --git a/analysis/parse_commit.py b/analysis/parse_commit.py new file mode 100644 index 0000000..d4f25b6 --- /dev/null +++ b/analysis/parse_commit.py @@ -0,0 +1,36 @@ +import csv + +def process_commits_csv(file_path): + # Open the CSV file for reading + with open(file_path, mode='r', encoding='utf-8') as file: + # Use csv.reader to parse the CSV file + csv_reader = csv.reader(file) + + # Define headers for better understanding of what each column represents + headers = ["commit_hash", "author_name", "author_email", "author_date", + "committer_name", "committer_email", "committer_date", + "tree_hash", "parent_hashes", "full_commit_message"] + + # Initialize an empty list to store processed commits + commits = [] + + # Process each row in the CSV + for row in csv_reader: + # Ensure that the row is correctly formatted + if len(row) == len(headers): + commit_data = dict(zip(headers, row)) + commits.append(commit_data) + + return commits + +if __name__ == "__main__": + # Path to your CSV file + file_path = 'commits.csv' + + # Process the CSV file + commits = process_commits_csv(file_path) + + # Print out a summary of the commits + # print_commit_summary(commits) + + # You can further process `commits` list as needed (e.g., filter by author, date, etc.) diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..5824f74 --- /dev/null +++ b/data/README.md @@ -0,0 +1 @@ +# The data will be generated here. \ No newline at end of file diff --git a/scripts/count_mails.sh b/scripts/count_mails.sh new file mode 100644 index 0000000..5d4d3eb --- /dev/null +++ b/scripts/count_mails.sh @@ -0,0 +1,2 @@ +# The first mail is Wed, 13 Feb 2019 16:56:04 -0500 +git rev-list --all --objects | grep 'm$' | awk '{print $1}' | wc -l diff --git a/scripts/dump_commit.sh b/scripts/dump_commit.sh new file mode 100755 index 0000000..ceac841 --- /dev/null +++ b/scripts/dump_commit.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Check if the number of commits (n) is passed as an argument +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +# Store the argument in a variable +n=$1 + +# Run git log with the specified number of commits and format the output into a CSV file +git log --pretty=format:'"%H","%an","%ae","%ad","%cn","%ce","%cd","%T","%P","%B","%N"' -n "$n" --date=iso > commits.csv + +echo "Dumped $n commits to commits.csv" diff --git a/dump.sh b/scripts/dump_email.sh similarity index 100% rename from dump.sh rename to scripts/dump_email.sh diff --git a/scripts/mention_bpf_commit.sh b/scripts/mention_bpf_commit.sh new file mode 100644 index 0000000..371123a --- /dev/null +++ b/scripts/mention_bpf_commit.sh @@ -0,0 +1 @@ +git log --grep=bpf --oneline | wc -l \ No newline at end of file