I have a tab-delimited text file that is formatted as such:

TITLE   FSAL
Professor   "30,000.00"
Professor   0
Staff   "90,000.00"
Professor   "50,000.00"
Manager "80,000.00"
Professor   "40,000.00"

My goal is to rank the job titles by the highest average salary. The code that I have so far is:

from collections import Counter
job_file = open("jobs.txt", "r")

headers = job_file.readline()
titles = []
salaries = []

for line in job_file.readlines():
  line.rstrip()
  (title, fsal) = line.split('\t')
  #convert fsal from string to float,
  fsal = float(fsal.replace('"', '').replace(',', ''))

  titles.append(title)
  salaries.append(fsal)

#Average salary for all titles
avg_salary = sum(salaries)/len(titles)
print "Average salary for all titles = ", avg_salary
#Average salary for all titles =  48333.3333333

What can I do to output the average salary by job title so that I can use .sort() to rank them by highest average salary ie.:

average salary for Professor = 30000.0  #(30000.0 + 0.0 + 50000.0 + 40000.0)/4
average salary for Staff = 90000.0  #90000.0/1
average salary for Manager = 80000.0  #80000.0/1

The final output should look like this:

1. Staff 90000.0
2. Manager 80000.0
3. Professor 30000.0
有帮助吗?

解决方案

#!/usr/bin/python
import collections
Job = collections.namedtuple('Job', ['title', 'salary'])
all_jobs = []
with open("jobs.txt", "r") as job_file:
    headers = job_file.readline()
    for line in job_file:
        (title, fsal) = line.rstrip().split('\t')
        fsal = float(fsal.replace('"', '').replace(',', ''))
        all_jobs.append(Job(title, fsal))
avg_salary = sum(
    job.salary for job in all_jobs) / len(all_jobs)
print "Average salary for all titles = ", avg_salary
sorted_jobs = sorted(
    all_jobs, key=lambda job: job.salary, reverse=True)
for i, j in enumerate(sorted_jobs):
    print '%i. %s \t%g' % (i + 1, j.title, j.salary)

Edited: the OP wanted averages for each job title:

#!/usr/bin/python
import collections
Job = collections.namedtuple('Job', ['title', 'salary'])
salaries = {}
with open("jobs.txt", "r") as job_file:
    headers = job_file.readline()
    for line in job_file:
        (title, fsal) = line.rstrip().split('\t')
        fsal = float(fsal.replace('"', '').replace(',', ''))
        if title in salaries:
            salaries[title].append(fsal)
        else:
            salaries[title] = [fsal]
average = lambda alist: sum(alist) / len(alist)
average_salaries = [Job(title, average(salaries[title]))
                    for title in salaries]
sorted_salaries = sorted(
    average_salaries, key=lambda job: job.salary, reverse=True)
for i, j in enumerate(sorted_salaries):
    print '%i. %s \t%g' % (i + 1, j.title, j.salary)
许可以下: CC-BY-SA归因
不隶属于 StackOverflow
scroll top