Running Amazon EMR Job from Java SDK

Question 1

I have solved this by correcting the secret key at my end.

Question 2

This work for me:

public void runScriptClientes(Calendar executionDate) {

    // creacion credecencial s3
    BasicAWSCredentials awsCreds = new BasicAWSCredentials(rb.getString("awsAccessKey"),
            rb.getString("awsSecretKey"));

    // creacion cliente para conectarse s3
    AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(awsCreds);
    emr.setRegion(Region.getRegion(Regions.EU_WEST_1));

    // calculo de las carpeta a procesar
    Map<String, FolderS3> s3DataToProcessInput = getRutasInput(executionDate);
    //Map<String, Boolean> s3DataToProcessOut = getRutaInput();

    for (Entry<String, FolderS3> bucket_ : s3DataToProcessInput.entrySet()){
        String nameBucket = bucket_.getKey();
        FolderS3 folderS3 = bucket_.getValue();
        // verificar existencia bucket
        if(folderS3.getExistInBucket()){
            listaConcurrente.add(folderS3);
            StepFactory stepFactory = new StepFactory();

            StepConfig stepHive = new StepConfig()
                    .withName(rb.getString("nameStepClientesS3")+":"+nameBucket)/*nombre del step a ejecutar*/
                    .withActionOnFailure(ActionOnFailure.CONTINUE) /*accion a seguir si el step falla*/
                    .withHadoopJarStep(
                            stepFactory.newRunHiveScriptStep(rb.getString("scriptClienteS3"), 
                                    "-d", "s3DataToProcess=s3://"+rb.getString("bucketPropio")+"/"+rb.getString("ruta_input_c1")+folderS3.getNameKey(),
                                    "-d", "s3DataToProcessOut=s3://"+rb.getString("bucketPropioOUT")+"/"+rb.getString("ruta_output_c1")+folderS3.getOutputFolder(),
                                    "-d", "windowTime=tablaparametro"));

            AddJobFlowStepsRequest jobFlow = new AddJobFlowStepsRequest().withJobFlowId(rb.getString("jobflowID"))
                    .withSteps(stepHive);

            //mientras el estado sea pending o running
            AddJobFlowStepsResult result = emr.addJobFlowSteps(jobFlow);
            List<String> id = result.getStepIds();
            DescribeStepRequest describe = new DescribeStepRequest().withStepId(id.get(0));
            describe.setClusterId(rb.getString("jobflowID"));
            describe.setRequestCredentials(awsCreds); 
            DescribeStepResult res = emr.describeStep(describe);
            StepStatus status = res.getStep().getStatus();
            String stas = status.getState();

            while (stas.equals(StepExecutionState.PENDING.name()) || stas.equals(StepExecutionState.RUNNING.name())){
                try {
                    Thread.sleep(5000);
                    res = emr.describeStep(describe);
                    status = res.getStep().getStatus();
                    stas = status.getState();
                    log.info(stas);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }

            if (stas.equals(StepExecutionState.COMPLETED.name())) {
                folderS3.setProcessedInput(Boolean.TRUE);
                listaConcurrente.remove(folderS3);
                log.info("Step finalizado ok : "+folderS3 );
            }else if(stas.equals(StepExecutionState.FAILED.name()) || stas.equals(StepExecutionState.CANCELLED.name())){
                listaConcurrente.remove(folderS3);
                folderS3.setProcessedInput(Boolean.FALSE);
                listaConcurrente.add(folderS3);
                log.info("Step Fallo o fue Cancelado : "+folderS3 );
            }

            // leer datos del resultado y cargar en BBDD

        }
    }
}

Question 3

Below is the link you can refer,

http://mpouttuclarke.wordpress.com/2011/06/24/how-to-run-an-elastic-mapreduce-job-using-the-java-sdk/

Note: some of the methods used in the above method are deprecated. Refer to the aws reference guide for an updated version.