Demo entry 2624951

words

   

Submitted by anonymous on Sep 04, 2015 at 07:03
Language: Python 3. Code size: 731 Bytes.

import re, sys
from collections import Counter
from urllib.request import urlopen

rstrip = lambda s: s.rstrip()   # for Python 3

stop_words_url = 'http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words'
stop_words = list(map(rstrip, urlopen(stop_words_url).readlines()))

# Return list of (lower-cased) words in a file.
def file_words(file_name):
	with open(file_name) as f:
		return re.findall('\w+', f.read().lower())

word_counts = Counter(sum(map(file_words, sys.argv[1:]),[]))
for word in stop_words:
	if word in stop_words: word_counts.pop(word, None)

# Display results to your taste.
for word,count in word_counts.most_common(50):
	if word not in stop_words:
		print("%3d  %s" % (count, word))

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).