Rather than generating the TSV file manually, use the csv
module, which will take care of escaping any literal tabs for you. The codecs
module can be used to automatically encode the text for you as it is written to standard output.
import json
import sys
import csv
import codecs
def main():
writer = csv.writer(codecs.getwriter('utf8')(sys.stdout), delimiter="\t")
for line in sys.stdin:
line = line.strip()
data = []
try:
data.append(json.loads(line))
except ValueError as detail:
continue
for tweet in data:
## deletes any rate limited data
if tweet.has_key('limit'):
pass
else:
writer.writerow([
tweet['id_str'],
tweet['user']['screen_name'],
tweet['text']
])
if __name__ == '__main__':
main()