Get heading and pitch from pixels on Street View

Question 1

TL;DR: JavaScript code for a proof of concept included at the end of this answer.

The heading and pitch parameters h0 and p0 of the panorama image corresponds to a direction. By using the focal length f of the camera to scale this direction vector, one can get the 3D coordinates (x0, y0, z0) of the viewport center at (u0, v0):

x0 = f * cos( p0 ) * sin( h0 )
y0 = f * cos( p0 ) * cos( h0 )
z0 = f * sin( p0 )

The goal is now to find the 3D coordinates of the point at to some given pixel coordinates (u, v) in the image. First, map these pixel coordinates to pixel offsets (du, dv) (to the right and to the top) from the viewport center:

du = u - u0 = u - w / 2
dv = v0 - v = h / 2 - v

Then a local orthonormal 2D basis of the viewport in 3D has to be found. The unit vector (ux, uy, uz) supports the x-axis (to the right along the direction of increasing headings) and the vector (vx, vy, vz) supports the y-axis (to the top along the direction of increasing pitches) of the image. Once these two vectors are determined, the 3D coordinates of the point on the viewport matching the (du, dv) pixel offset in the viewport are simply:

x = x0 + du * ux + dv * vx
y = y0 + du * uy + dv * vy
z = z0 + du * uz + dv * vz

And the heading and pitch parameters h and p for this point are then:

R = sqrt( x * x + y * y + z * z )
h = atan2( x, y )
p = asin( z / R )

Finally to get the two unit vectors (ux, uy, uz) and (vx, vy, vz), compute the derivatives of the spherical coordinates by the heading and pitch parameters at (p0, h0), and one should get:

vx = -sin( p0 ) * sin ( h0 )
vy = -sin( p0 ) * cos ( h0 )
vz =  cos( p0 ) 

ux =  sgn( cos ( p0 ) ) * cos( h0 )
uy = -sgn( cos ( p0 ) ) * sin( h0 )
uz = 0

where sgn( a ) is +1 if a >= 0 else -1.

Complements:

The focal length is derived from the horizontal field of view and the width of the image:
```
f = (w / 2) / Math.tan(fov / 2)
```
The reverse mapping from heading and pitch parameters to pixel coordinates can be done similarly:
1. Find the 3D coordinates (x, y, z) of the direction of the ray corresponding to the specified heading and pitch parameters,
2. Find the 3D coordinates (x0, y0, z0) of the direction of the ray corresponding to the viewport center (an associated image plane is located at (x0, y0, z0) with an (x0, y0, z0) normal),
3. Intersect the ray for the specified heading and pitch parameters with the image plane, this gives the 3D offset from the viewport center,
4. Project this 3D offset on the local basis, getting the 2D offsets du and dv
5. Map du and dv to absolute pixel coordinates.
In practice, this approach seems to work similarly well on both square and rectangular viewports.

Proof of concept code (call the onLoad() function on a web page containing a sized canvas element with a "panorama" id)

'use strict';

var viewer;

function onClick(e) {
  viewer.click(e);
}

function onLoad() {
  var element = document.getElementById("panorama");
  viewer = new PanoramaViewer(element);
  viewer.update();
}

function PanoramaViewer(element) {
  this.element = element;
  this.width = element.width;
  this.height = element.height;
  this.pitch = 0;
  this.heading = 0;

  element.addEventListener("click", onClick, false);
}

PanoramaViewer.FOV = 90;

PanoramaViewer.prototype.makeUrl = function() {
  var fov = PanoramaViewer.FOV;

  return "https://maps.googleapis.com/maps/api/streetview?location=40.457375,-80.009353&size=" + this.width + "x" + this.height + "&fov=" + fov + "&heading=" + this.heading + "&pitch=" + this.pitch;
}

PanoramaViewer.prototype.update = function() {
  var element = this.element;

  element.style.backgroundImage = "url(" + this.makeUrl() + ")";

  var width = this.width;
  var height = this.height;

  var context = element.getContext('2d');

  context.strokeStyle = '#FFFF00';

  context.beginPath();
  context.moveTo(0, height / 2);
  context.lineTo(width, height / 2);
  context.stroke();

  context.beginPath();
  context.moveTo(width / 2, 0);
  context.lineTo(width / 2, height);
  context.stroke();
}

function sgn(x) {
  return x >= 0 ? 1 : -1;
}

PanoramaViewer.prototype.unmap = function(heading, pitch) {
  var PI = Math.PI
  var cos = Math.cos;
  var sin = Math.sin;
  var tan = Math.tan;

  var fov = PanoramaViewer.FOV * PI / 180.0;
  var width = this.width;
  var height = this.height;

  var f = 0.5 * width / tan(0.5 * fov);

  var h = heading * PI / 180.0;
  var p = pitch * PI / 180.0;

  var x = f * cos(p) * sin(h);
  var y = f * cos(p) * cos(h);
  var z = f * sin(p);

  var h0 = this.heading * PI / 180.0;
  var p0 = this.pitch * PI / 180.0;

  var x0 = f * cos(p0) * sin(h0);
  var y0 = f * cos(p0) * cos(h0);
  var z0 = f * sin(p0);

  //
  // Intersect the ray O, v = (x, y, z)
  // with the plane at M0 of normal n = (x0, y0, z0)
  //
  //   n . (O + t v - M0) = 0
  //   t n . v = n . M0 = f^2
  //
  var t = f * f / (x0 * x + y0 * y + z0 * z);

  var ux = sgn(cos(p0)) * cos(h0);
  var uy = -sgn(cos(p0)) * sin(h0);
  var uz = 0;

  var vx = -sin(p0) * sin(h0);
  var vy = -sin(p0) * cos(h0);
  var vz = cos(p0);

  var x1 = t * x;
  var y1 = t * y;
  var z1 = t * z;

  var dx10 = x1 - x0;
  var dy10 = y1 - y0;
  var dz10 = z1 - z0;

  // Project on the local basis (u, v) at M0
  var du = ux * dx10 + uy * dy10 + uz * dz10;
  var dv = vx * dx10 + vy * dy10 + vz * dz10;

  return {
    u: du + width / 2,
    v: height / 2 - dv,
  };
}

PanoramaViewer.prototype.map = function(u, v) {
  var PI = Math.PI;
  var cos = Math.cos;
  var sin = Math.sin;
  var tan = Math.tan;
  var sqrt = Math.sqrt;
  var atan2 = Math.atan2;
  var asin = Math.asin;

  var fov = PanoramaViewer.FOV * PI / 180.0;
  var width = this.width;
  var height = this.height;

  var h0 = this.heading * PI / 180.0;
  var p0 = this.pitch * PI / 180.0;

  var f = 0.5 * width / tan(0.5 * fov);

  var x0 = f * cos(p0) * sin(h0);
  var y0 = f * cos(p0) * cos(h0);
  var z0 = f * sin(p0);

  var du = u - width / 2;
  var dv = height / 2 - v;

  var ux = sgn(cos(p0)) * cos(h0);
  var uy = -sgn(cos(p0)) * sin(h0);
  var uz = 0;

  var vx = -sin(p0) * sin(h0);
  var vy = -sin(p0) * cos(h0);
  var vz = cos(p0);

  var x = x0 + du * ux + dv * vx;
  var y = y0 + du * uy + dv * vy;
  var z = z0 + du * uz + dv * vz;

  var R = sqrt(x * x + y * y + z * z);
  var h = atan2(x, y);
  var p = asin(z / R);

  return {
    heading: h * 180.0 / PI,
    pitch: p * 180.0 / PI
  };
}

PanoramaViewer.prototype.click = function(e) {
  var rect = e.target.getBoundingClientRect();
  var u = e.clientX - rect.left;
  var v = e.clientY - rect.top;

  var uvCoords = this.unmap(this.heading, this.pitch);

  console.log("current viewport center");
  console.log("  heading: " + this.heading);
  console.log("  pitch: " + this.pitch);
  console.log("  u: " + uvCoords.u)
  console.log("  v: " + uvCoords.v);

  var hpCoords = this.map(u, v);
  uvCoords = this.unmap(hpCoords.heading, hpCoords.pitch);

  console.log("click at (" + u + "," + v + ")");
  console.log("  heading: " + hpCoords.heading);
  console.log("  pitch: " + hpCoords.pitch);
  console.log("  u: " + uvCoords.u);
  console.log("  v: " + uvCoords.v);

  this.heading = hpCoords.heading;
  this.pitch = hpCoords.pitch;
  this.update();
}

Question 2

This answer is unprecise, have a look at most recent answer of user3146587.

I'm not very good at mathematical explanations. I've coded an example and tried to explain the steps in the code. As soon as you click on one point in the image, this point becomes the new center of the image. Even though you have explicitly not demanded for this, this is perfect for illustrating the effect. The new image is drawn with the previously calculated angle.

Example: JSFiddle

The important part is, that I use the radian to calculate radius of the "sphere of view". The radian in this case is the width of the image (in your example 100)

radius = radian / FOV

With the radian, radius and the relative position of the mouse position I can calculate the degree that changes from the center to the mouse position.

Center(50,50)
MousePosition(75/25)
RelativeMousePosition(25,-25)

When the relative mouse position is 25 the radian used for the calculation of the horizontal angle is 50.

radius = 50 / FOV // we've calculated the radius before, it stays the same

See this image for the further process: enter image description here

I can calculate the new heading and pitch when I add/subtract the calculated angle to the actual angle (depending on left/right, above/under). See the linked JSFiddle for the correct behavior of this.

Doing the reverse is simple, just do the listed steps in the opposite direction (the radius stays the same).

As I've already mentioned, I'm not very good at mathematical explanations, but don't hesitate to ask questions in the comments.

Question 3

Here is an attempt to give a mathematical derivation of the answer to your question.

Note: Unfortunately, this derivation only works in 1D and the conversion from a pair of angular deviations to heading and pitch is wrong.

Notations:

f: focal length of the camera
h: height in pixels of the viewport
w: width in pixels of the viewport
dy: vertical deviation in pixels from the center of the viewport
dx: horizontal deviation in pixels from the center of the viewport
fov_y: vertical field of view
fov_x: horizontal field of view
dtheta_y: relative vertical angle from the center of the viewport
dtheta_x: relative horizontal angle from the center of the viewport

Given dy, the vertical offset of the pixel from the center of the viewport (this pixel corresponds to the green ray on the figure), we are trying to find dtheta_y (the red angle), the relative vertical angle from the center of the viewport (the pitch of the center of the viewport is known to be theta_y0).

Notations

From the figure, we have:

tan( fov_y / 2 ) = ( h / 2 ) / f

tan( dtheta_y ) = dy / f

so:

tan( dtheta_y ) = dy / ( ( h / 2 ) / tan( fov_y / 2 ) )
                = 2 * dy * tan( fov_y /  2 ) / h

and finally:

dtheta_y = atan( 2 * dy * tan( fov_y / 2 ) / h )

This is the relative pitch angle for the pixel at dy from the center of the viewport, simply add to it the pitch angle at the center of the viewport to get the absolute pitch angle (i.e. theta_y = theta_y0 + dtheta_y).

similarly:

dtheta_x = atan( 2 * dx * tan( fov_x / 2 ) / w )

This is the relative heading angle for the pixel at dx from the center of the viewport.

Complements:

Both relations can be inverted to get the mapping from relative heading / pitch angle to relative pixel coordinates, for instance:
```
dy = h tan( dtheta_y ) / ( 2 * tan( fov_y / 2 ) )
```

The vertical and horizontal fields of view fov_y and fov_x are linked by the relation:

w / h = tan( fov_x / 2 ) / tan( fov_y / 2 )

so:

fov_x = 2 * atan( w * tan( fov_y / 2 ) / h )

The vertical and horizontal deviations from the viewport center dy and dx can be mapped to absolute pixel coordinates:
```
x = w / 2 + dx
y = h / 2 - dy
```
Proof of concept fiddle

Question 4

Martin Matysiak wrote a JS library that implements the inverse of this (placing a marker at a specific heading/pitch). I mention this as the various jsfiddle links in other answers are 404ing, the original requestor added a comment requesting this, and this SO page comes up near the top for related searches.

The blog post discussing it is at https://martinmatysiak.de/blog/view/panomarker.

The library itself is at https://github.com/marmat/google-maps-api-addons.

There's documentation and demos at http://marmat.github.io/google-maps-api-addons/ (look at http://marmat.github.io/google-maps-api-addons/panomarker/examples/basic.html and http://marmat.github.io/google-maps-api-addons/panomarker/examples/fancy.html for the PanoMarker examples).