Debugging Scala compiler's magic
Have you ever wondered how is Scala able to do some clever trick on top of JVM? Or maybe you have asked yourself “WTF is wrong” while starring at a piece of code? That is what happened to me yesterday. I was trying to figure out some magic about case classes and pattern matching. And even worse I wanted to replicate some of it myself. After spending hours googling I was ready to start decompiling classes generated by the Scala compiler to understand what exactly is going on there. But it turned out there is a simpler way to get into the internals.
The Scala compiler has multiple phases in the process of turning your beautiful source into byte code. Here are these phases:
$ scala -Xshow-phases
namer, typer, superaccessors, pickler, refchecks, liftcode, uncurry, tailcalls, explicitouter, erasure, lazyvals, lambdalift, constructors, flatten, mixin, cleanup, icode, inliner, closelim, dce, jvm, sample-phase
You can find more information about each of them on this wiki page.
What is interesting for us now is that we can make the compiler to print intermediate results in between the phases. So we can see how the code evolves from it’s initial look to the final result.
To make the compiler to print the syntax trees after a certain phase add -Xprint:<phase>
to the scalac
command line. For example, it would be -Xprint:namer
for the namer
phase.
I created a simple test to experiment with the case classes magic that was mentioned earlier. Here is the source code.
package net.yefremov.sample
case class Container(value: Any)
object MatchingTest extends App {
def printType(container: Container): Unit = {
container match {
case Container(stringValue: String) => println(s"It is a string: $stringValue")
case Container(intValue: Int) => println(s"It is an int: $intValue")
}
}
printType(Container("Heya!"))
printType(Container(42))
}
What if it is an SBT project? How do I get to scalac
to add this -Xprint
parameter? One way is to set scalacOptions
in Build.scala/build.sbt.
That is not very convenient if you just want to check something quickly and move on. The other approach is to enable this feature only for the current
sbt session:
$ sbt
> set scalacOptions ++=Seq("-Xprint:namer")
After that when you run compile
you will get your syntax trees dumped into the console. Here is the output for my test after the phase namer
.
[[syntax trees at end of namer]] // MatchingTest.scala
package net.yefremov.sample {
case class Container extends scala.Product with scala.Serializable {
<caseaccessor> <paramaccessor> val value: Any = _;
def <init>(value: Any) = {
super.<init>();
()
}
};
object MatchingTest extends App {
def <init>() = {
super.<init>();
()
};
def printType(container: Container): Unit = container match {
case Container((stringValue @ (_: String))) => println(StringContext("It is a string: ", "").s(stringValue))
case Container((intValue @ (_: Int))) => println(StringContext("It is an int: ", "").s(intValue))
};
printType(Container("Heya!"));
printType(Container(42))
}
}
And here is the output after the phase lambdalift
.
[[syntax trees at end of lambdalift]] // MatchingTest.scala
package net.yefremov.sample {
case class Container extends Object with Product with Serializable {
<caseaccessor> <paramaccessor> private[this] val value: Object = _;
<stable> <caseaccessor> <accessor> <paramaccessor> def value(): Object = Container.this.value;
def <init>(value: Object): net.yefremov.sample.Container = {
Container.super.<init>();
Container.this.$asInstanceOf[Product$class]()./*Product$class*/$init$();
()
};
<synthetic> def copy(value: Object): net.yefremov.sample.Container = new net.yefremov.sample.Container(value);
<synthetic> def copy$default$1(): Object = Container.this.value();
override <synthetic> def productPrefix(): String = "Container";
<synthetic> def productArity(): Int = 1;
<synthetic> def productElement(x$1: Int): Object = {
case <synthetic> val x1: Int = x$1;
(x1: Int) match {
case 0 => Container.this.value()
case _ => throw new IndexOutOfBoundsException(scala.Int.box(x$1).toString())
}
};
override <synthetic> def productIterator(): Iterator = runtime.this.ScalaRunTime.typedProductIterator(Container.this);
<synthetic> def canEqual(x$1: Object): Boolean = x$1.$isInstanceOf[net.yefremov.sample.Container]();
override <synthetic> def hashCode(): Int = ScalaRunTime.this._hashCode(Container.this);
override <synthetic> def toString(): String = ScalaRunTime.this._toString(Container.this);
override <synthetic> def equals(x$1: Object): Boolean = Container.this.eq(x$1).||({
case <synthetic> val x1: Object = x$1;
case5(){
if (x1.$isInstanceOf[net.yefremov.sample.Container]())
matchEnd4(true)
else
case6()
};
case6(){
matchEnd4(false)
};
matchEnd4(x: Boolean){
x
}
}.&&({
<synthetic> val Container$1: net.yefremov.sample.Container = x$1.$asInstanceOf[net.yefremov.sample.Container]();
Container.this.value().==(Container$1.value()).&&(Container$1.canEqual(Container.this))
}))
};
<synthetic> object Container extends runtime.AbstractFunction1 with Serializable {
def <init>(): net.yefremov.sample.Container.type = {
Container.super.<init>();
()
};
final override <synthetic> def toString(): String = "Container";
case <synthetic> def apply(value: Object): net.yefremov.sample.Container = new net.yefremov.sample.Container(value);
case <synthetic> def unapply(x$0: net.yefremov.sample.Container): Option = if (x$0.==(null))
scala.this.None
else
new Some(x$0.value());
<synthetic> private def readResolve(): Object = sample.this.Container;
case <synthetic> <bridge> def apply(v1: Object): Object = Container.this.apply(v1)
};
object MatchingTest extends Object with App {
def <init>(): net.yefremov.sample.MatchingTest.type = {
MatchingTest.super.<init>();
MatchingTest.this.$asInstanceOf[App$class]()./*App$class*/$init$();
()
};
def printType(container: net.yefremov.sample.Container): Unit = {
case <synthetic> val x1: net.yefremov.sample.Container = container;
case6(){
if (x1.ne(null))
{
val stringValue: Object = x1.value();
if (stringValue.$isInstanceOf[String]())
{
<synthetic> val x2: String = (stringValue.$asInstanceOf[String](): String);
matchEnd5({
scala.this.Predef.println(new StringContext(scala.this.Predef.wrapRefArray(Array[String]{"It is a string: ", ""}.$asInstanceOf[Array[Object]]())).s(scala.this.Predef.genericWrapArray(Array[Object]{x2})));
scala.runtime.BoxedUnit.UNIT
})
}
else
case7()
}
else
case7()
};
case7(){
if (x1.ne(null))
{
val intValue: Object = x1.value();
if (intValue.$isInstanceOf[Int]())
{
<synthetic> val x3: Int = (scala.Int.unbox(intValue): Int);
matchEnd5({
scala.this.Predef.println(new StringContext(scala.this.Predef.wrapRefArray(Array[String]{"It is an int: ", ""}.$asInstanceOf[Array[Object]]())).s(scala.this.Predef.genericWrapArray(Array[Object]{scala.Int.box(x3)})));
scala.runtime.BoxedUnit.UNIT
})
}
else
case8()
}
else
case8()
};
case8(){
matchEnd5(throw new MatchError(x1))
};
matchEnd5(x: runtime.BoxedUnit){
()
}
};
MatchingTest.this.printType(new net.yefremov.sample.Container("Heya!"));
MatchingTest.this.printType(new net.yefremov.sample.Container(scala.Int.box(42)))
}
}
There is a lot of stuff going on behind the scene! A couple of things related to pattern matching to learn from this output:
-
There is an unapply method generated for cases classes. Nothing surprising here.
def unapply(x$0: net.yefremov.sample.Container): Option = if (x$0.==(null)) scala.this.None else new Some(x$0.value());
-
The generated method is not used for pattern matching. The compiler generates a more optimal code for case classes pattern matching instead.
I hope that will be helpful for someone exploring Scala magic.