Question

import argparse
import gzip
import dateutil.parser
import sys


def main():
userID = "UserID"
previous_timestamp = "2000-01-01 00:00:00"
t_value = 0


parser = argparse.ArgumentParser()
# deze regel is nodig om de file als een argument mee te geven

parser.add_argument("file") # file als argument

parser.add_argument("-t", action="store_true") # argument om te kijken of er wordt voldaan aan de threshold van 1800 seconden

parser.add_argument("-threshold") # een variable threshold, die standaard ook 1800 seconden is.

args = parser.parse_args()

f = gzip.open(args.file, 'rb') # open van de file vanuit een gzip formaat.



content = f.read().splitlines() # het uitlezen van de file en het splitten van de lines

for line in content:
    words = line.split("\t")
    ID = words[0]

    if not userID == ID: #als de userID's niet hetzelfde zijn, dan een --- lijn printen
        print '---------------------------------------'
        print line
        userID = ID
    else:
        print line

    if args.t or args.threshold: # als de gebruiker wil kijken naar patronen met behulp van een tijdthreshold
         if args.threshold:
             t_value = args.threshold
    else:
         t_value = 1800 # standaard threshold waarden

    t_value = int(t_value)

    timestamp = words[2]

    time_difference = dateutil.parser.parse(timestamp) - dateutil.parser.parse(previous_timestamp) # tijdverschil tussen queries berekenen
    if time_difference.seconds >= t_value: # als het verschil groter is dan de threshold, komt er een enter tussen
        print "\n"
        print line





f.close()

if __name__ == "__main__":
    main()

I get the error

TypeError: 'NoneType' object is not iterable

The full traceback is:

Traceback (most recent call last):
  File "opdracht1_2.py", line 68, in <module>
    main()
  File "opdracht1_2.py", line 54, in main
    time_difference = dateutil.parser.parse(timestamp) - dateutil.parser.parse(previous_timestamp) # tijdverschil tussen queries berekenen
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/python_dateutil-2.2-py2.7.egg/dateutil/parser.py", line 748, in parse
    return DEFAULTPARSER.parse(timestr, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/python_dateutil-2.2-py2.7.egg/dateutil/parser.py", line 310, in parse
    res, skipped_tokens = self._parse(timestr, **kwargs)
TypeError: 'NoneType' object is not iterable

Any ideas how to solve this?

Was it helpful?

Solution

You need to skip the first line of your file, it has a header.

Use the csv module to read your data more efficiently instead of reading it all into memory at once:

import csv

with gzip.open(args.file, 'rb') as f:
    reader = csv.reader(f, delimiter='\t')
    next(reader, None)  # skip first row of the file; negeer de eerste regel

    for words in reader:
        words = line.split("\t")

Using next() on the reader iterable reads one line from the file, which we ignore. If there are no lines in the file the function returns the default, None instead.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top