[Tutor] Writing to a file

Fri Jan 19 12:58:10 EST 2018

=

On Jan 18, 2018 5:45 PM, "Devansh Rastogi" <devanshr at gmail.com> wrote:
>
> Hello,
>
> I'm new to python and programming as
>
> from collections import Counter
> import json
>
I don't see any value for having a class. All you need are functions and
global variables

> class Files:
>     def __init__(self, filename):

I don't see any need for a function or"with". Just write file_input_string
= open(filename, 'r', encoding='utf-16').read().replace('\n', ' ')

>         with open(filename, 'r', encoding='utf-16') as file_input:
>             self.file_input_string = file_input.read().replace('\n', ' ')
>
You are assuming that all words are separated by blanks which is rarely the
case in natural language.

>     def num_of_words(self):
>         """ Return number of words in the file"""
>         return str(len(self.file_input_string.split()))
>
Several comments on Counting:

Your program is creating lists of ones. Rather than counting them all you
need to do is take the length of each list.. e;g;: lowercase_letters =
len(1 for c in self.file_input_string if c.islower())

However there is a much better way to do the counting: translate the text
using the string translate method into various characters that identify the
class of each letter in the file. Then count the occurrences of each of
those characters. Example: counting Upper Lower, Nunber, and punctuation
Single, Double stroke):

txt=  "THIS is 123 ,./ :*(" # input file text

transtable =
str.maketrans("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
,./:*(",
  "L"*26 + "U"*26 + "N"*10 + "S"*4 + "D"*3) # maps input characters to
corresponding class characters

xlation = txt.translate(transtable) # 'UUUUSLLSNNNSSSSSDDD' # creates
string of class characters

counts =dict(Counter(xlation) # {'S': 7, 'U': 4, 'N': 3, 'L': 2, 'D': 3}

>     def num_of_keystrokes(self):
>         """ Total number of keystrokes
>         # abcde.. = 1 stroke
>         # ABCDE.. = 2 strokes
>         # '.,-/;[]=\ = 1 stroke
>         # !@#$%^&*()_+|}{":?>< = 2 strokes """
>
>         lowercase_letters = sum(1 for c in self.file_input_string if
> c.islower())
>         uppercase_letters = sum(2 for c in self.file_input_string if
> c.isupper())
>         one_keystroke_punc = ".,-=[]\;'/ "  # space included
>         puncuation_one = sum(1 for c in self.file_input_string if c in
> one_keystroke_punc)
>         two_keystroke_punc = '!@#$%^&*()_+|}{":?><'
>         puncuation_two = sum(2 for c in self.file_input_string if c in
> two_keystroke_punc)
>
>         return str(lowercase_letters + uppercase_letters +
> puncuation_one + puncuation_two)
>
>     def num_of_char(self):
>         """ Return number of characters in the string without spaces"""
>         return str(len(self.file_input_string) -
> self.file_input_string.count(" "))
>
>     def frequency_of_char(self):
>         """ Frequency of characters in the file """
>         count = Counter(self.file_input_string)

There is no need to apply dict to count. Counters have an items method.

>         dict_count = dict(count)
>         print("{:<12} {:<10}".format('Character', 'Frequency'))
>         for k, v in dict_count.items():
>             print("{:<12} {:<10}".format(k, v))
>
>     def frequency_of_words(self):
>         """ Frequency of words in the file"""
>         # word_count = Counter()
>         # for word in self.file_input_string.replace(' ', '\n'): ###
> macht wider char. sollte fuer line funktioniern
>         #     word_count.update(word)
>         # print("{:<15} {:15}".format("Word", "Frequency"))
>         # for k, v in word_count.items():
>         #     print("{:<15} {:<15}".format(k, v))
>
>         word_list = self.file_input_string.split()
>         word_frequecy = [word_list.count(w) for w in word_list]  ##
> funktioniert mit string.count!!
>         word_frequecy_dict = dict(zip(word_list, word_frequecy))
>         print("{:<15} {:15}".format("Word", "Frequency"))
>         for k, v in word_frequecy_dict.items():
>             print("{:<15} {:<15}".format(k, v))
>
>     def average_len_of_words(self):
>         """ calculate the averge length of the words"""
>         word_list = self.file_input_string.split()
>         average = sum(len(word) for word in word_list) / len(word_list)
>         return str(average)
>
>     def write_to_file(self, data):
>         """ collect all data for Morgen_Kinder.txt in a file"""
>         with open('data.json', 'w') as f:
>             json.dump(data, f, sort_keys=True, indent=4)
>
> #test
> x = Files('Morgen_Kinder.txt')
> a = Files.num_of_char(x)
> Files.write_to_file(x,a)
> print(a)
> b = Files.num_of_words(x)
> Files.write_to_file(x,b)
> print(b)
> c = Files.frequency_of_char(x)
> Files.write_to_file(x,c)
> d = Files.frequency_of_words(x)
> Files.write_to_file(x,d)
> e = Files.average_len_of_words(x)
> Files.write_to_file(x,e)
> print(e)
> g = Files.num_of_keystrokes(x)
> Files.write_to_file(x,g)
> print(g)
> _______________________________________________
> Tutor maillist  -  Tutor at python.org
> To unsubscribe or change subscription options:
> https://mail.python.org/mailman/listinfo/tutor