غير معروف دول مجلس التعاون الخليجي خطأ أثناء ترجمة ذراع النيون (الحرجة)

https://stackoverflow.com/questions/3811148

26-09-2019
|

سؤال

لدي ذراع النيون Cortex-A8 ومعالج يستند الهدف.كنت تحسين قانون بلدي من خلال الاستفادة من النيون.ولكن عندما تجمع قانون بلدي أحصل على هذا الخطأ غريب.لا أعرف كيفية إصلاح هذا.

أحاول تجميع التعليمات البرمجية التالية (جزء 1) باستخدام رمز Sourcery (PART2) على المضيف.وأنا الحصول على هذا الخطأ غريب (PART3).أفعل شيئا خطأ هنا ؟ يمكن لأي شخص آخر ترجمة هذا وانظر إذا أنها أيضا الحصول على نفس تجميع الخطأ ؟

الجزء الغريب هو في التعليمات البرمجية إذا كنت التعليق الخروج else if(step_size == 4) جزء من التعليمات البرمجية ، ثم الخطأ يختفي.ولكن للأسف بلدي الأمثل ليست كاملة مع ذلك ، لذا يجب أن يكون عليه.

في البداية اعتقدت انها مشكلة مع CodeSourcey مترجم (على بلدي المضيف) لذا جمعت البرنامج على الهدف مباشرة (هدفي يعمل على أوبونتو).اعتدت دول مجلس التعاون الخليجي إلى هناك مرة أخرى, لقد تحصل على نفس الخطأ عندما التعليق الخروج else if(step_size == 4) جزء, ثم الخطأ يختفي.

مساعدة!

الجزء 1

#include<stdio.h>
#include"arm_neon.h"

#define IMAGE_HEIGHT 480
#define IMAGE_WIDTH  640

float32_t integral_image[IMAGE_HEIGHT][IMAGE_WIDTH];

float32x4_t box_area_compute3(int, int , int , int , unsigned int , float);

inline int min(int, int);

int main()
{

 box_area_compute3(1, 1, 4, 4, 2, 0);

 return 0;
}

float32x4_t box_area_compute3(int row, int col, int num_rows, int num_cols, unsigned int step_size, float three)
{
 unsigned int height = IMAGE_HEIGHT;
 unsigned int width = IMAGE_WIDTH;

 int temp_row = row + num_rows;
 int temp_col = col + num_cols;

 int r1 = (min(row, height))- 1 ;
 int r2 = (min(temp_row, height)) - 1;

 int c1 = (min(col, width)) - 1;
 int c2 = (min(temp_col, width)) - 1;

 float32x4_t v128_areas;

 if(step_size == 2)
 {
  float32x4x2_t top_left, top_right, bottom_left, bottom_right;
  top_left    = vld2q_f32((float32_t *)integral_image[r1] + c1);
  top_right   = vld2q_f32((float32_t *)integral_image[r1] + c2);
  bottom_left  = vld2q_f32((float32_t *)integral_image[r2] + c1);
  bottom_right  = vld2q_f32((float32_t *)integral_image[r2] + c2);

  v128_areas = vsubq_f32(vsubq_f32(vaddq_f32(top_left.val[0], bottom_right.val[0]), top_right.val[0]), bottom_left.val[0]);


 }
 else if(step_size == 4)
 {
  float32x4x4_t top_left, top_right, bottom_left, bottom_right;
  top_left   = vld4q_f32((float32_t *)integral_image[r1] + c1);
  top_right   = vld4q_f32((float32_t *)integral_image[r1] + c2);
  bottom_left  = vld4q_f32((float32_t *)integral_image[r2] + c1);
  bottom_right  = vld4q_f32((float32_t *)integral_image[r2] + c2);

  v128_areas = vsubq_f32(vsubq_f32(vaddq_f32(top_left.val[0], bottom_right.val[0]), top_right.val[0]), bottom_left.val[0]);

 }

 if(three == 3.0)
  v128_areas = vmulq_n_f32(v128_areas, three);

 return v128_areas;

}

inline int min(int X, int Y)
{
 return (X < Y ? X : Y);
}

الجزء 2

arm-none-linux-gnueabi-gcc -O0 -g3 -Wall -c -fmessage-length=0 -fcommon -MMD -MP -MF"main.d" -MT"main.d" -mcpu=cortex-a8 -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -o"main.o" "../main.c"

الجزء 3

../main.c: In function 'box_area_compute3':
../main.c:65: error: unable to find a register to spill in class 'GENERAL_REGS'
../main.c:65: error: this is the insn:
(insn 226 225 227 5 c:\program files\codesourcery\sourcery g++\bin\../lib/gcc/arm-none-linux-gnueabi/4.4.1/include/arm_neon.h:9863 (parallel [
           (set (reg:XI 148 [ D.17028 ])
               (unspec:XI [
                       (mem:XI (reg:SI 3 r3 [301]) [0 S64 A64])
                       (reg:XI 148 [ D.17028 ])
                       (unspec:V4SF [
                               (const_int 0 [0x0])
                           ] 191)
                   ] 111))
           (set (reg:SI 3 r3 [301])
               (plus:SI (reg:SI 3 r3 [301])
                   (const_int 32 [0x20])))
       ]) 1605 {neon_vld4qav4sf} (nil))
../main.c:65: confused by earlier errors, bailing out
cs-make: *** [main.o] Error 1

المحلول 2

حسنا أنا اتصلت رمز Sourcery حول هذه المشكلة وأنها تعتبر هذا الخلل في دول مجلس التعاون الخليجي مترجم.لذا كتبت do_it4(){.....} وظيفة في الجمعية بدلا من استخدام تيه إينترينسيكس.الآن يعمل جيدا!

نصائح أخرى

لا أستطيع اختبار هذا لأنني لا أملك toolchain ، ولكن هذا النوع من الخطأ في كثير من الأحيان يكون عمل حول صياغة قانون قليلا.عموما هذا لا ينبغي أن يحدث ، وينبغي الإبلاغ عن الخلل, ولكن كنت تستخدم معالج وظائف محددة ، والتي ربما أقل أيضا اختبار مصقول من بقية مترجم.

لأنه سجل تسرب خطأ و كنت قد حصلت على عدة مؤشرات المشاركة أني أشك أن المترجم قد تحاول تحميل المزيد من البيانات في سجلات مما يجب أن خوفا من أن قد يكون هناك بعض التعرج يحدث (التي ربما لا يحدث في الواقع).أدناه سوف نتعامل مع إمكانية ذلك وكذلك فعل عدد قليل من الأشياء الأخرى التي قد تقلل من تعقيد رمز من المترجم منظور (على الرغم من أنه قد لا يبدو هذا هو الحال).

#include<stdio.h>
#include"arm_neon.h"

#define IMAGE_HEIGHT 480
#define IMAGE_WIDTH  640

float32_t integral_image[IMAGE_HEIGHT][IMAGE_WIDTH];

float32x4_t box_area_compute3(int, int , int , int , unsigned int , float);

inline int min(int, int);

int main()
{

 box_area_compute3(1, 1, 4, 4, 2, 0);

 return 0;
}

/* By putting these in separate functions the compiler will initially
 * think about them by themselves, without the complications of the
 * surrounding code.  This may give it the abiltiy to optimise the
 * code somewhat before trying to inline it.
 * This may also serve to make it more obvious to the compiler that
 * the local variables are dead after their use (since they are
 * dead after the call returns, and that the lifetimes of some variable
 * cannot actually overlap (hopefully reducing the register needs).
 */
static inline float32x4_t do_it2(float32_t *tl, float32_t *tr, float32_t *bl, float32_t * br) {
    float32x4x2_t top_left, top_right, bottom_left, bottom_right;
    float32x4_t A, B;

    top_left = vld2q_f32(tl);
    top_right = vld2q_f32(tr);
    bottom_left = vld2q_f32(bl);
    bottom_right = vld2q_f32(br);

    /* By spreading this across several statements I have created several
     * additional sequence points.  The compiler does not think that it
     * has to dereference all of the pointers before doing any of the
     * computations.... maybe. */
    A = vaddq_f32(*top_left.val, *bottom_right.val);
    B = vsubq_f32(A, *top_right.val);
    return vsubq_f32(B, *bottom_left);
}

static inline float32x4_t do_it4(float32_t *tl, float32_t *tr, float32_t *bl, float32_t * br) {
    float32x4x4_t top_left, top_right, bottom_left, bottom_right;
    float32x4_t A, B;

    top_left = vld4q_f32(tl);
    top_right = vld4q_f32(tr);
    bottom_left = vld4q_f32(bl);
    bottom_right = vld4q_f32(br);

    A = vaddq_f32(*top_left.val, *bottom_right.val);
    B = vsubq_f32(A, *top_right.val);
    return vsubq_f32(B, *bottom_left);
}

float32x4_t box_area_compute3(int row, int col, int num_rows, int num_cols, unsigned int step_size, float three)
{
 unsigned int height = IMAGE_HEIGHT;
 unsigned int width = IMAGE_WIDTH;

 int temp_row = row + num_rows;
 int temp_col = col + num_cols;

 int r1 = (min(row, height))- 1 ;
 int r2 = (min(temp_row, height)) - 1;

 int c1 = (min(col, width)) - 1;
 int c2 = (min(temp_col, width)) - 1;

 float32x4_t v128_areas;

     float32_t *tl = (float32_t *)integral_image[r1] + c1;
 float32_t *tr = (float32_t *)integral_image[r1] + c2;
 float32_t *bl = (float32_t *)integral_image[r2] + c1;
 float32_t *br = (float32_t *)integral_image[r2] + c2;


 switch (step_size) {
    case 2:
      v128_areas = do_it2(tl, tr, bl, br);
      break;

 case 4:
      v128_areas = do_it4(tl, tr, bl, br);
      break;
 }

 if(three == 3.0)
  v128_areas = vmulq_n_f32(v128_areas, three);

 return v128_areas;

}

inline int min(int X, int Y)
{
 return (X < Y ? X : Y);
}

آمل أن يساعد هذا و أنا لم أعرض أي أخطاء.

الخط:

float32x4x4_t top_left, top_right, bottom_left, bottom_right;

يستخدم كل 16 س السجلات!فإنه ليس من المستغرب جدا أن المترجم لا يمكن التعامل مع هذا.ربما يمكن أن يكون ثابت هذا عن طريق إعادة الكتابة إلى استخدام عدد أقل من السجلات.

الذراع النيون Cortex-A8 يكون vfpv3 الدعم ، Cortex-A5 يكون vfpv4 و neon2 الدعم (كما عن:إذا كنت تستخدم -mfloat-أبي=صعوبة تخطي القدرة على محاكاة البرمجيات في عداد المفقودين التعليمات بحيث لا يمكن توليد التعليمات البرمجية التي من شأنها أن تكون الأمثل vfpv4 ولكن سوف تعمل على vfpv3 مع محاكاة البرمجيات)

مرخصة بموجب: CC-BY-SA مع الإسناد

لا تنتمي إلى StackOverflow