diff --git a/.gitignore b/.gitignore
index 08b497d..be27472 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 linux
-bpf
+mails/bpf
 linux/commits.csv
diff --git a/Makefile b/Makefile
index ff5c0de..cca5749 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,6 @@
 linux:
 	git clone https://github.com/torvalds/linux
 
-bpf:
-	git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git
-	cd bpf/git/0.git && ../../../dump.sh
+mails/bpf:
+	cd mails && git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git
 
-linux/commits.csv:
-	cd linux && git log --pretty=format:'"%H","%an","%ae","%ad","%cn","%ce","%cd","%T","%P","%B"' --date=iso > commits.csv
\ No newline at end of file
diff --git a/README.md b/README.md
index 2ce41a2..2c30102 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,7 @@
-# test-git
+# bpf subsystem
 
-```
-git clone --mirror http://lore.kernel.org/bpf/0 bpf/git/0.git
+- Wed, 13 Feb 2019 Until 10 Sep 2024: 126469 mails
 
-cd bpf/git/0.git
-../../../dump.sh
-```
 
 Feature versions:
 
diff --git a/analysis/parse_commit.py b/analysis/parse_commit.py
new file mode 100644
index 0000000..d4f25b6
--- /dev/null
+++ b/analysis/parse_commit.py
@@ -0,0 +1,36 @@
+import csv
+
+def process_commits_csv(file_path):
+    # Open the CSV file for reading
+    with open(file_path, mode='r', encoding='utf-8') as file:
+        # Use csv.reader to parse the CSV file
+        csv_reader = csv.reader(file)
+        
+        # Define headers for better understanding of what each column represents
+        headers = ["commit_hash", "author_name", "author_email", "author_date", 
+                   "committer_name", "committer_email", "committer_date", 
+                   "tree_hash", "parent_hashes", "full_commit_message"]
+        
+        # Initialize an empty list to store processed commits
+        commits = []
+
+        # Process each row in the CSV
+        for row in csv_reader:
+            # Ensure that the row is correctly formatted
+            if len(row) == len(headers):
+                commit_data = dict(zip(headers, row))
+                commits.append(commit_data)
+
+        return commits
+
+if __name__ == "__main__":
+    # Path to your CSV file
+    file_path = 'commits.csv'
+
+    # Process the CSV file
+    commits = process_commits_csv(file_path)
+
+    # Print out a summary of the commits
+    # print_commit_summary(commits)
+
+    # You can further process `commits` list as needed (e.g., filter by author, date, etc.)
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 0000000..5824f74
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1 @@
+# The data will be generated here.
\ No newline at end of file
diff --git a/scripts/count_mails.sh b/scripts/count_mails.sh
new file mode 100644
index 0000000..5d4d3eb
--- /dev/null
+++ b/scripts/count_mails.sh
@@ -0,0 +1,2 @@
+# The first mail is  Wed, 13 Feb 2019 16:56:04 -0500
+git rev-list --all --objects | grep 'm$' | awk '{print $1}' | wc -l
diff --git a/scripts/dump_commit.sh b/scripts/dump_commit.sh
new file mode 100755
index 0000000..ceac841
--- /dev/null
+++ b/scripts/dump_commit.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Check if the number of commits (n) is passed as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <number_of_commits>"
+  exit 1
+fi
+
+# Store the argument in a variable
+n=$1
+
+# Run git log with the specified number of commits and format the output into a CSV file
+git log --pretty=format:'"%H","%an","%ae","%ad","%cn","%ce","%cd","%T","%P","%B","%N"' -n "$n" --date=iso > commits.csv
+
+echo "Dumped $n commits to commits.csv"
diff --git a/dump.sh b/scripts/dump_email.sh
similarity index 100%
rename from dump.sh
rename to scripts/dump_email.sh
diff --git a/scripts/mention_bpf_commit.sh b/scripts/mention_bpf_commit.sh
new file mode 100644
index 0000000..371123a
--- /dev/null
+++ b/scripts/mention_bpf_commit.sh
@@ -0,0 +1 @@
+git log --grep=bpf --oneline | wc -l
\ No newline at end of file