Skip to content

Commit ad935df

Browse files
authored
Merge pull request TheAlgorithms#179 from Shivams334/master
Added new code
2 parents 5c10a29 + fe7b86c commit ad935df

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

sorts/countingsort.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Python program for counting sort
2+
3+
# This is the main function that sort the given string arr[] in
4+
# in the alphabetical order
5+
def countSort(arr):
6+
7+
# The output character array that will have sorted arr
8+
output = [0 for i in range(256)]
9+
10+
# Create a count array to store count of inidividul
11+
# characters and initialize count array as 0
12+
count = [0 for i in range(256)]
13+
14+
# For storing the resulting answer since the
15+
# string is immutable
16+
ans = ["" for _ in arr]
17+
18+
# Store count of each character
19+
for i in arr:
20+
count[ord(i)] += 1
21+
22+
# Change count[i] so that count[i] now contains actual
23+
# position of this character in output array
24+
for i in range(256):
25+
count[i] += count[i-1]
26+
27+
# Build the output character array
28+
for i in range(len(arr)):
29+
output[count[ord(arr[i])]-1] = arr[i]
30+
count[ord(arr[i])] -= 1
31+
32+
# Copy the output array to arr, so that arr now
33+
# contains sorted characters
34+
for i in range(len(arr)):
35+
ans[i] = output[i]
36+
return ans
37+
38+
# Driver program to test above function
39+
arr = "thisisthestring"
40+
ans = countSort(arr)
41+
print ("Sorted string array is %s" %("".join(ans)))

sorts/external-sort.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python
2+
3+
#
4+
# Sort large text files in a minimum amount of memory
5+
#
6+
import os
7+
import sys
8+
import argparse
9+
10+
class FileSplitter(object):
11+
BLOCK_FILENAME_FORMAT = 'block_{0}.dat'
12+
13+
def __init__(self, filename):
14+
self.filename = filename
15+
self.block_filenames = []
16+
17+
def write_block(self, data, block_number):
18+
filename = self.BLOCK_FILENAME_FORMAT.format(block_number)
19+
file = open(filename, 'w')
20+
file.write(data)
21+
file.close()
22+
self.block_filenames.append(filename)
23+
24+
def get_block_filenames(self):
25+
return self.block_filenames
26+
27+
def split(self, block_size, sort_key=None):
28+
file = open(self.filename, 'r')
29+
i = 0
30+
31+
while True:
32+
lines = file.readlines(block_size)
33+
34+
if lines == []:
35+
break
36+
37+
if sort_key is None:
38+
lines.sort()
39+
else:
40+
lines.sort(key=sort_key)
41+
42+
self.write_block(''.join(lines), i)
43+
i += 1
44+
45+
def cleanup(self):
46+
map(lambda f: os.remove(f), self.block_filenames)
47+
48+
49+
class NWayMerge(object):
50+
def select(self, choices):
51+
min_index = -1
52+
min_str = None
53+
54+
for i in range(len(choices)):
55+
if min_str is None or choices[i] < min_str:
56+
min_index = i
57+
58+
return min_index
59+
60+
61+
class FilesArray(object):
62+
def __init__(self, files):
63+
self.files = files
64+
self.empty = set()
65+
self.num_buffers = len(files)
66+
self.buffers = {i: None for i in range(self.num_buffers)}
67+
68+
def get_dict(self):
69+
return {i: self.buffers[i] for i in range(self.num_buffers) if i not in self.empty}
70+
71+
def refresh(self):
72+
for i in range(self.num_buffers):
73+
if self.buffers[i] is None and i not in self.empty:
74+
self.buffers[i] = self.files[i].readline()
75+
76+
if self.buffers[i] == '':
77+
self.empty.add(i)
78+
79+
if len(self.empty) == self.num_buffers:
80+
return False
81+
82+
return True
83+
84+
def unshift(self, index):
85+
value = self.buffers[index]
86+
self.buffers[index] = None
87+
88+
return value
89+
90+
91+
class FileMerger(object):
92+
def __init__(self, merge_strategy):
93+
self.merge_strategy = merge_strategy
94+
95+
def merge(self, filenames, outfilename, buffer_size):
96+
outfile = open(outfilename, 'w', buffer_size)
97+
buffers = FilesArray(self.get_file_handles(filenames, buffer_size))
98+
99+
while buffers.refresh():
100+
min_index = self.merge_strategy.select(buffers.get_dict())
101+
outfile.write(buffers.unshift(min_index))
102+
103+
def get_file_handles(self, filenames, buffer_size):
104+
files = {}
105+
106+
for i in range(len(filenames)):
107+
files[i] = open(filenames[i], 'r', buffer_size)
108+
109+
return files
110+
111+
112+
113+
class ExternalSort(object):
114+
def __init__(self, block_size):
115+
self.block_size = block_size
116+
117+
def sort(self, filename, sort_key=None):
118+
num_blocks = self.get_number_blocks(filename, self.block_size)
119+
splitter = FileSplitter(filename)
120+
splitter.split(self.block_size, sort_key)
121+
122+
merger = FileMerger(NWayMerge())
123+
buffer_size = self.block_size / (num_blocks + 1)
124+
merger.merge(splitter.get_block_filenames(), filename + '.out', buffer_size)
125+
126+
splitter.cleanup()
127+
128+
def get_number_blocks(self, filename, block_size):
129+
return (os.stat(filename).st_size / block_size) + 1
130+
131+
132+
def parse_memory(string):
133+
if string[-1].lower() == 'k':
134+
return int(string[:-1]) * 1024
135+
elif string[-1].lower() == 'm':
136+
return int(string[:-1]) * 1024 * 1024
137+
elif string[-1].lower() == 'g':
138+
return int(string[:-1]) * 1024 * 1024 * 1024
139+
else:
140+
return int(string)
141+
142+
143+
144+
def main():
145+
parser = argparse.ArgumentParser()
146+
parser.add_argument('-m',
147+
'--mem',
148+
help='amount of memory to use for sorting',
149+
default='100M')
150+
parser.add_argument('filename',
151+
metavar='<filename>',
152+
nargs=1,
153+
help='name of file to sort')
154+
args = parser.parse_args()
155+
156+
sorter = ExternalSort(parse_memory(args.mem))
157+
sorter.sort(args.filename[0])
158+
159+
160+
if __name__ == '__main__':
161+
main()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy