-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdata-compression.cpp
145 lines (117 loc) · 4.03 KB
/
data-compression.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/**
* @file data-compression.cpp
* @author prakash ([email protected])
* @brief
Consider the following data compression technique. We have a table of m
text strings, each at most k in length. We want to encode a data string D of
length n using as few text strings as possible. For example, if our table
contains (a,ba,abab,b) and the data string is bababbaababa, the best way to
encode it is (b,abab,ba,abab,a)—a total of five code words. Give an O(nmk)
algorithm to find the length of the best encoding. You may assume that every
string has at least one encoding in terms of the table.
* @version 0.1
* @date 2021-09-01
*
* @copyright Copyright (c) 2021
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void printTable(char **table, int k) {
printf("Table: {%s", table[0]);
for (int i = 1; i < k; i++) {
printf(", %s", table[i]);
}
printf("}\n");
}
void printSolution(char *string, int *branchLengths, int n) {
if (!n) {
return;
}
printSolution(string, branchLengths, branchLengths[n]);
printf("%.*s ", n - branchLengths[n], string + branchLengths[n]);
}
int *solve(char *string, char **table, int n, int m, int k, int *comparisons) {
int *currentBranchEnds =
(int *)calloc(sizeof(int), n); // keeps track of all of the current ends
int currentBranchCount =
1; // keeps track of how many active branch ends there are
int *newBranchEnds = (int *)calloc(
sizeof(int), n); // keeps track of the new branches on each iteration
int newBranchCount = 0;
int *branchLengths = (int *)calloc(
sizeof(int), (n + 1)); // used to reconstruct the solution in the end
// used for O(1) length lookups
int *tableStringLengths = (int *)malloc(sizeof(int) * k);
for (int i = 0; i < k; i++) {
tableStringLengths[i] = strlen(table[i]);
}
*comparisons = 0;
// continue searching while the entire string hasn't been encoded
while (!branchLengths[n]) {
// for every active branch
for (int i = 0; i < currentBranchCount; i++) {
// try all table strings
for (int j = 0; j < k; j++) {
int newBranchEnd = currentBranchEnds[i] + tableStringLengths[j];
// if the new length (branch end) already exists OR
// if the new branch would be too long for the string, discard the
// branch
*comparisons += 1;
if (newBranchEnd > n || branchLengths[newBranchEnd]) {
continue;
}
// check to see if the table string matches the target string at this
// position could be done with strcmp, but is done in a loop here to be
// explicit about complexity
char match = 1;
for (int l = 0; table[j][l]; l++) {
*comparisons += 1;
if (string[currentBranchEnds[i] + l] != table[j][l]) {
match = 0;
break;
}
}
// if it matches, we can create a new branch at this position
*comparisons += 1;
if (match) {
branchLengths[newBranchEnd] = currentBranchEnds[i];
newBranchEnds[newBranchCount] = newBranchEnd;
newBranchCount += 1;
}
}
}
// swap the branch ends arrays to save copying
int *tmp = currentBranchEnds;
currentBranchEnds = newBranchEnds;
newBranchEnds = tmp;
currentBranchCount = newBranchCount;
newBranchCount = 0;
}
free(currentBranchEnds);
free(newBranchEnds);
free(tableStringLengths);
return branchLengths;
}
int main() {
int k = 4;
int m = 4;
char **table = (char **)malloc(sizeof(char *) * k);
table[0] = (char *)"a";
table[1] = (char *)"ba";
table[2] = (char *)"abab";
table[3] = (char *)"b";
printTable(table, k);
char *string = (char *)"bababbaababa";
int n = strlen(string);
printf("String: %s\n", string);
int comparisons;
int *solution = solve(string, table, n, m, k, &comparisons);
printf("Solution: ");
printSolution(string, solution, n);
printf("\n");
printf("Comparisons: %d\n", comparisons);
free(table);
free(solution);
}