Using python script to search in multiple files and outputting an individual file for each one

StackOverflow https://stackoverflow.com/questions/17152916

  •  01-06-2022
  •  | 
  •  

Question

I am trying to get a program up and running that takes astronomical data files with the extension .fits and takes all of the files with that extension in a folder and searches for specific header information, and subsequently places it into a text folder corresponding to each file. I am using a while loop, and please forgive me if this code is badly formatted, it is my first time using python! My main problem is that I can only get the program to read one file before it closes itself.

#!/usr/bin/env python

#This code properly imports all '.fits' files in a specified directory and 
#outputs them into a .txt format that allows several headers and their contained
#data to be read.  

import copy
import sys
import pyfits
import string
import glob
import os.path
import fnmatch
import numpy as np

DIR = raw_input("Please input a valid directory : ") #-----> This prompts for input from the user to find the '.fits' files
os.chdir(DIR)

initialcheck = 0  #Initiates the global counter for the number of '.fits' files in the specified directory
targetcheck = 0   #Initiates the global counter for the amount of files that have been processed

def checkinitial(TD):
    #This counts the number of '.fits' files in your directory
    for files in glob.iglob('*.fits'):
        check = len(glob.glob1(TD,"*.fits"))
        global initialcheck
        initialcheck = check    
        if initialcheck == 0:
        print 'There are no .FITS files in this directory! Try Again...'
        sys.exit()
    return initialcheck
def sorter(TD, targcheck, inicheck):
    #This function will call the two counters and compare them until the number of processed files is greater than the files in the     #directory, thereby finishing the loop 
    global initialcheck
    inicheck = initialcheck
    global targetcheck
    targcheck = targetcheck
    while targcheck <= inicheck:
        os.walk(TD)
        for allfiles in glob.iglob('*.fits'):
        print allfiles #This prints out the filenames the porgram is currently processing
            with pyfits.open(allfiles) as HDU:
            #This block outlines all of the search terms in their respective headers, you will need to set the indices                  #below to search in the correct header for the specified term you are looking for, however no alterations to                    #the header definitions should be made.
            HDU_HD_0 = HDU[0].header
            HDU_HD_1 = HDU[1].header
            #HDU_HD_2 = HDU[2].header  -----> Not usually needed, can be activated if data from this header is required
            #HDU_HD_3 = HDU[3].header  -----> Use this if the '.fits' file contains a third header (unlikely but possible)
            KeplerIDIndex = HDU_HD_0.index('KEPLERID')
            ChannelIndex = HDU_HD_0.index('SKYGROUP')
            TTYPE1Index = HDU_HD_1.index('TTYPE1')
            TTYPE8Index = HDU_HD_1.index('TTYPE8')
            TTYPE9Index = HDU_HD_1.index('TTYPE9')
            TTYPE11Index = HDU_HD_1.index('TTYPE11')
            TTYPE12Index = HDU_HD_1.index('TTYPE12')
            TTYPE13Index = HDU_HD_1.index('TTYPE13')
            TTYPE14Index = HDU_HD_1.index('TTYPE14')
            TUNIT1Index = HDU_HD_1.index('TUNIT1')
            TUNIT8Index = HDU_HD_1.index('TUNIT8')
            TUNIT9Index = HDU_HD_1.index('TUNIT9')
            TUNIT11Index = HDU_HD_1.index('TUNIT11')
            TUNIT12Index = HDU_HD_1.index('TUNIT12')
            TUNIT13Index = HDU_HD_1.index('TUNIT13')
            TUNIT14Index = HDU_HD_1.index('TUNIT14')    
            #The below variables are an index search for the data found in the specified indices above, allowing the data                   #to be found in teh numpy array that '.fits' files use                      
            File_Data_KID = list( HDU_HD_0[i] for i in [KeplerIDIndex])
            File_Data_CHAN = list( HDU_HD_0[i] for i in [ChannelIndex])
            Astro_Data_1 = list( HDU_HD_1[i] for i in [TTYPE1Index])
            Astro_Data_8 = list( HDU_HD_1[i] for i in [TTYPE8Index])
            Astro_Data_9 = list( HDU_HD_1[i] for i in [TTYPE9Index])
            Astro_Data_11 = list( HDU_HD_1[i] for i in [TTYPE11Index])  
            Astro_Data_12 = list( HDU_HD_1[i] for i in [TTYPE12Index])
            Astro_Data_13 = list( HDU_HD_1[i] for i in [TTYPE13Index])
            Astro_Data_14 = list( HDU_HD_1[i] for i in [TTYPE14Index])
            Astro_Data_Unit_1 = list( HDU_HD_1[i] for i in [TUNIT1Index])
            Astro_Data_Unit_8 = list( HDU_HD_1[i] for i in [TUNIT8Index])
            Astro_Data_Unit_9 = list( HDU_HD_1[i] for i in [TUNIT9Index])
            Astro_Data_Unit_11 = list( HDU_HD_1[i] for i in [TUNIT11Index]) 
            Astro_Data_Unit_12 = list( HDU_HD_1[i] for i in [TUNIT12Index])
            Astro_Data_Unit_13 = list( HDU_HD_1[i] for i in [TUNIT13Index])
            Astro_Data_Unit_14 = list( HDU_HD_1[i] for i in [TUNIT14Index])
            HDU.close()
            with open('Processed ' + allfiles + ".txt", "w") as copy:
                targetcheck += 1
                Title1_Format = '{0}-----{1}'.format('Kepler I.D.','Channel')
                Title2_Format = '-{0}--------{1}------------{2}------------{3}------------{4}------------{5}-------------{6}-'.format('TTYPE1','TTYPE8','TTYPE9','TTYPE11','TTYPE12','TTYPE13','TTYPE14')
                File_Format = '{0}--------{1}'.format(File_Data_KID, File_Data_CHAN)
                Astro_Format = '{0}---{1}---{2}---{3}---{4}---{5}---{6}'.format(Astro_Data_1, Astro_Data_8, Astro_Data_9, Astro_Data_11, Astro_Data_12, Astro_Data_13, Astro_Data_14)
                Astro_Format_Units = '{0}  {1}  {2}  {3}  {4}   {5}   {6}'.format(Astro_Data_Unit_1, Astro_Data_Unit_8, Astro_Data_Unit_9, Astro_Data_Unit_11, Astro_Data_Unit_12, Astro_Data_Unit_13, Astro_Data_Unit_14)
                copy.writelines("%s\n" % Title1_Format)
                    copy.writelines( "%s\n" % File_Format)
                copy.writelines('\n')
                copy.writelines("%s\n" % Title2_Format)
                    copy.writelines( "%s\n" % Astro_Format)
                copy.writelines('\n')
                    copy.writelines( "%s\n" % Astro_Format_Units)   
                Results = copy          
            return Results

checkinitial(DIR)
sorter(DIR, targetcheck, initialcheck)
Was it helpful?

Solution

I think you keep getting confused between a single file and a list of files. Try something like this:

def checkinitial(TD):
    #This counts the number of '.fits' files in your directory
    check = len(glob.glob1(TD,"*.fits"))
    if not check:
        print 'There are no .FITS files in this directory! Try Again...'
        sys.exit()
return check

def sorter(TD, targcheck, inicheck):
    """This function will call the two counters and compare them until the number of processed
       files is greater than the files in the directory, thereby finishing the loop
    """ 
    for in_file in glob.iglob(os.path.join(TD,'*.fits')):
        print in_file  # This prints out the filenames the program is currently processing
        with pyfits.open(in_file) as HDU:
            # <Process input file HDU here>
        out_file_name = 'Processed_' + os.path.basename(in_file) + ".txt"
        with open(os.path.join(TD, out_file_name), "w") as copy:
            # <Write stuff to your output file copy here>
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top