这里主要是看书的笔记。从基础开始。不断记录。直到啃完这本书(android软件安全权威指南:丰生强)

常见的android文件格式

1、jar包

就是一个zip格式的压缩包,里面是编译后的java字节码class文件集合。所以jar文件常叫作jar包。有些安全较高的jar包会对包含的class文件进行签名。并且把签名信息保存在META-INF目录。静态分析可以使用jd-gui。动态分析可以使用AspectJ。

2、aar包

aar是android studio的全新的库文件格式,除了可以包含代码,还可以包含任何开发中使用的资源数据。实际上也是一个zip包格式。目录结构和apk文件类似。

3、apk文件结构

AndroidManifest.xml:编译好的AXML二进制格式文件。

META-INF:apk的签名信息

classes.dex:程序的可执行代码。开区MutiDex会有多个dex

res:程序中使用的资源文件

resources.arsc:编译好的二进制格式的资源信息

assets:如果使用asset系统来存放raw资源。所有资源都将保存在这里

3.1、apk文件的生成流程

adt时代的生成流程:

aapt打包程序资源,处理AndroidManifest.xml和xml布局文件生成R.java文件。然后使用aidl解析AIDL接口,定义并生成相应的java文件。然后调用java编译器生成class文件。再使用dx将所有class文件与jar包打包生成dex文件。调用apkbuilder将上面的资源和class文件合并成apk。最后对apk进行对齐处理和签名。

ec574038051854668fcd6161a941bcbb.png

android studio时代的生成流程:

使用gradle作为构建工具。

image-20210628213708543

可以通过系统程序安装apk(开机时安装)

开机启动时由PackageManagerService服务完成。会安装/system/app的所有程序

4、classes.dex

包含apk的可执行代码。可以先看下android源码中的dalvik/libdex/DexFile.h的定义ß

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
struct DexFile {
/* odex的头 */
const DexOptHeader* pOptHeader;

/* dex文件头,指定了dex文件的一些数据,记录了其他数据结构在dex文件中的物理偏移 */
const DexHeader* pHeader;
/* 索引结构区 */
const DexStringId* pStringIds;
const DexTypeId* pTypeIds;
const DexFieldId* pFieldIds;
const DexMethodId* pMethodIds;
const DexProtoId* pProtoIds;
/* 真实的数据存放 */
const DexClassDef* pClassDefs;
/* 静态链接数据区 */
const DexLink* pLinkData;

/*
* These are mapped out of the "auxillary" section, and may not be
* included in the file.
*/
const DexClassLookup* pClassLookup;
const void* pRegisterMapPool; // RegisterMapClassPool

/* points to start of DEX file data */
const u1* baseAddr;

/* track memory overhead for auxillary structures */
int overhead;

/* additional app-specific data structures associated with the DEX */
//void* auxData;
};

然后一些数据类型的定义

1
2
3
4
5
6
7
8
typedef uint8_t             u1;
typedef uint16_t u2;
typedef uint32_t u4;
typedef uint64_t u8;
typedef int8_t s1;
typedef int16_t s2;
typedef int32_t s4;
typedef int64_t s8;

然后看看dex文件头的结构体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct DexHeader {
u1 magic[8]; /* 表示是一个有效的dex文件。值一般固定为64 65 78 0A 30 33 35 00(dex.035) */
u4 checksum; /* adler32 checksum dex文件的校验和,用来判断文件是否已经损坏或者篡改 */
u1 signature[kSHA1DigestLen]; /* SHA-1 hash 用来识别未经dexopt优化的dex文件*/
u4 fileSize; /* length of entire file 记录了包括dexHeader在内的整个dex文件的大小*/
u4 headerSize; /* offset to start of next section dexHeader占用的字节数,一般都是0x70*/
u4 endianTag; /* 指定dex运行环境的cpu字节序。预设是ENDIAN_CONSTANT等于0x12345678,也就是默认小端字节序 */
u4 linkSize; /* 链接段的大小 */
u4 linkOff; /* 文件偏移 */
u4 mapOff; /* DexMapList结构的文件偏移 */
u4 stringIdsSize; /* 下面都是数据段的大小和文件偏移 */
u4 stringIdsOff;
u4 typeIdsSize;
u4 typeIdsOff;
u4 protoIdsSize;
u4 protoIdsOff;
u4 fieldIdsSize;
u4 fieldIdsOff;
u4 methodIdsSize;
u4 methodIdsOff;
u4 classDefsSize;
u4 classDefsOff;
u4 dataSize;
u4 dataOff;
};

下面是用010查看dex实际例子的详细数据

image-20210628221317029

dalvik虚拟机解析dex文件内容,最终将其映射成DexMapList数据结构,mapoff字段指明DexMapList结构在dex文件中的偏移量。结构如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
struct DexMapItem {
u2 type; /* kDexType开头的类型 */
u2 unused; /* 未使用,用于字节对齐 */
u4 size; /* 数据的大小 */
u4 offset; /* 指定类型数据的文件偏移 */
};

/*
* Direct-mapped "map_list".
*/
struct DexMapList {
u4 size; /* 有多少个DexMapItem */
DexMapItem list[1]; /* entries */
};

enum {
kDexTypeHeaderItem = 0x0000,
kDexTypeStringIdItem = 0x0001,
kDexTypeTypeIdItem = 0x0002,
kDexTypeProtoIdItem = 0x0003,
kDexTypeFieldIdItem = 0x0004,
kDexTypeMethodIdItem = 0x0005,
kDexTypeClassDefItem = 0x0006,
kDexTypeCallSiteIdItem = 0x0007,
kDexTypeMethodHandleItem = 0x0008,
kDexTypeMapList = 0x1000,
kDexTypeTypeList = 0x1001,
kDexTypeAnnotationSetRefList = 0x1002,
kDexTypeAnnotationSetItem = 0x1003,
kDexTypeClassDataItem = 0x2000,
kDexTypeCodeItem = 0x2001,
kDexTypeStringDataItem = 0x2002,
kDexTypeDebugInfoItem = 0x2003,
kDexTypeAnnotationItem = 0x2004,
kDexTypeEncodedArrayItem = 0x2005,
kDexTypeAnnotationsDirectoryItem = 0x2006,
};

下面是dex真实的数据例子。这些数据和DexHeader中的偏移和大小对应。描述了整个DexHeader结构

image-20210628223619795

stringDataOff指向的字符串并不是普通的ascii字符串,而是MUTF-8编码的。这个是一个经过修改的UTF-8编码。和传统的UTF-8相似。下面是结构体

1
2
3
struct DexStringId {
u4 stringDataOff; /* 字符串数据偏移 */
};

下面是真实dex的数据贴图

image-20210628224254285

DexTypeId结构体如下

1
2
3
struct DexTypeId {
u4 descriptorIdx; /* index into stringIds list for type descriptor */
};

真实数据如下

image-20210628224518205

然后是DexProtoId方法声明结构体。

1
2
3
4
5
struct DexProtoId {
u4 shortyIdx; /* DexStringId列表的索引*/
u4 returnTypeIdx; /* DexTypeId的索引 */
u4 parametersOff; /* 指向DexTypeList的偏移 */
};

真实数据如下

image-20210628225103521

接着看上面DexTypeList的结构体

1
2
3
4
5
6
7
struct DexTypeList {
u4 size; /* dexTypeItem的个数 */
DexTypeItem list[1]; /* entries */
};
struct DexTypeItem {
u2 typeIdx; /* DexTypeId的索引 */
};

到这里就知道了。方法声明由返回类型和参数列表组成。

继续看DexFieldId。结构体如下

1
2
3
4
5
struct DexFieldId {
u2 classIdx; /* 类的类型,指向DexTypeId的索引 */
u2 typeIdx; /* 字段类型,指向DexTypeId的索引 */
u4 nameIdx; /* 字段名,指向DexStringId的索引 */
};

真实数据如下

image-20210628225926424

接下来是DexMethodId,结构如下

1
2
3
4
5
struct DexMethodId {
u2 classIdx; /* 类的类型,指向DexTypeId的索引 */
u2 protoIdx; /* 声明类型,指向DexProtoId的索引 */
u4 nameIdx; /* 方法名,指向DexStringId索引 */
};

真实数据如下

image-20210628230238458

最后是DexClassDef,结构如下

1
2
3
4
5
6
7
8
9
10
struct DexClassDef {
u4 classIdx; /* 类的类型,指向DexTypeId的索引 */
u4 accessFlags; /* 访问标志 */
u4 superclassIdx; /* 父类的类型,指向DexTypeId的索引 */
u4 interfacesOff; /* 接口,指向DexTypeList的偏移,如果没有接口的声明和实现,值为0 */
u4 sourceFileIdx; /* 类所在的源文件名,指向DexStringId的索引 */
u4 annotationsOff; /* 注释,根据类型不同会有注解类,注解字段,注解方法,注解参数,没有注解值就是0,指向DexAnnotationsDirectoryItem的结构体 */
u4 classDataOff; /* 类的数据部分,指向DexClassData结构的偏移 */
u4 staticValuesOff; /* 类中的静态数据,指向DexEncodeArray结构的偏移 */
};

真实数据如下

image-20210629205721474

接着是DexClassData的结构体如下。这几个不在DexFile.h中定义,而是在DexClass.h中定义的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/* expanded form of a class_data_item header */
struct DexClassDataHeader {
u4 staticFieldsSize; /* 静态字段的个数 */
u4 instanceFieldsSize; /* 实例字段的个数 */
u4 directMethodsSize; /* 直接方法的个数 */
u4 virtualMethodsSize; /* 虚方法的个数 */
};

/* expanded form of encoded_field */
struct DexField {
u4 fieldIdx; /* 指向DexFieldId的索引 */
u4 accessFlags; /* 访问标志 */
};

/* expanded form of encoded_method */
struct DexMethod {
u4 methodIdx; /* 指向DexMethodId的索引 */
u4 accessFlags; /* 访问标志 */
u4 codeOff; /* 指向DexCode结构的偏移 */
};

struct DexClassData {
DexClassDataHeader header; /* 指定字段和方法的个数 */
DexField* staticFields; /* 静态字段 */
DexField* instanceFields; /* 实例字段 */
DexMethod* directMethods; /* 直接方法 */
DexMethod* virtualMethods; /* 虚方法 */
};

然后看看DexClassData的真实数据

image-20210629215356769

这里相当于把DexClassDataHeader直接展示了。前面4个字段就是。下面是DexMethod的真实数据展示

image-20210629220004765

继续看这里面的DexCode结构

1
2
3
4
5
6
7
8
9
10
11
12
13
struct DexCode {
u2 registersSize; /* 使用寄存器的个数 */
u2 insSize; /* 参数的个数 */
u2 outsSize; /* 调用其他方法时使用的寄存器个数 */
u2 triesSize; /* try/catch语句的个数 */
u4 debugInfoOff; /* 指向调试信息的偏移 */
u4 insnsSize; /* 指令集的个数,以2字节为单位 */
u2 insns[1]; /* 指令集 */
/* 2字节空间用于对齐 */
/* followed by try_item[triesSize] DexTry结构体 */
/* followed by uleb128 handlersSize */
/* followed by catch_handler_item[handlersSize] DexCatchHandler结构体 */
};

这里的registersSize的值其实就是smali语法里面的.register指令后面的值。insSize对应了smali语法的.paramter。outSize是在方法内部调用外部方法使用到的寄存器个数。例如a方法使用5个寄存器,a方法内调用了b方法,然后b方法使用了20个寄存器。则outSize值是20。下面看真实数据展示

image-20210629222029571

5、dex文件的验证和优化

了解了验证和优化的过程,就知道了DexHeader中的checksum和signature字段的计算过程。就可以在修改dex文件后对这两个字段修正。

dexopt是android专门用来提供验证和优化操作的。dalvik虚拟机在加载dex文件时,通过指定的验证与优化选项来调用dexopt进行优化操作。

dexopt的主程序入口在OptMain.cpp。不过看了android9.0的代码中没有这个cpp了。4.4.4版本中还有。

处理apk、jar、zip中的classes.dex文件的函数是extractAndProcessZip()。同样在9.0的代码中找不到这个函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
static int extractAndProcessZip(int zipFd, int cacheFd,
const char* debugFileName, bool isBootstrap, const char* bootClassPath,
const char* dexoptFlagStr)
{
...
//查找目标文件中是否有classes.dex。没有就直接返回
zipEntry = dexZipFindEntry(&zippy, kClassesDex);
if (zipEntry == NULL) {
ALOGW("DexOptZ: zip archive '%s' does not include %s",
debugFileName, kClassesDex);
goto bail;
}
/*
* 读取classes.dex的时间戳和crc校验值
*/
if (dexZipGetEntryInfo(&zippy, zipEntry, NULL, &uncompLen, NULL, NULL,
&modWhen, &crc32) != 0)
{
ALOGW("DexOptZ: zip archive GetEntryInfo failed on %s", debugFileName);
goto bail;
}

uncompLen = uncompLen;
modWhen = modWhen;
crc32 = crc32;

/*
* 释放classes.dex到缓存文件
*/
if (dexZipExtractEntryToFile(&zippy, zipEntry, cacheFd) != 0) {
ALOGW("DexOptZ: extraction of %s from %s failed",
kClassesDex, debugFileName);
goto bail;
}

/* 解析传递过来的优化和验证的选项参数 */
if (dexoptFlagStr[0] != '\0') {
const char* opc;
const char* val;

opc = strstr(dexoptFlagStr, "v="); /* 验证选项 */
if (opc != NULL) {
switch (*(opc+2)) {
case 'n': verifyMode = VERIFY_MODE_NONE; break;
case 'r': verifyMode = VERIFY_MODE_REMOTE; break;
case 'a': verifyMode = VERIFY_MODE_ALL; break;
default: break;
}
}

opc = strstr(dexoptFlagStr, "o="); /* 优化选项 */
if (opc != NULL) {
switch (*(opc+2)) {
case 'n': dexOptMode = OPTIMIZE_MODE_NONE; break;
case 'v': dexOptMode = OPTIMIZE_MODE_VERIFIED; break;
case 'a': dexOptMode = OPTIMIZE_MODE_ALL; break;
case 'f': dexOptMode = OPTIMIZE_MODE_FULL; break;
default: break;
}
}

opc = strstr(dexoptFlagStr, "m=y"); /* register map */
if (opc != NULL) {
dexoptFlags |= DEXOPT_GEN_REGISTER_MAPS;
}

opc = strstr(dexoptFlagStr, "u="); /* uniprocessor target */
if (opc != NULL) {
switch (*(opc+2)) {
case 'y': dexoptFlags |= DEXOPT_UNIPROCESSOR; break;
case 'n': dexoptFlags |= DEXOPT_SMP; break;
default: break;
}
}
}

/*
* 根据优化选项和验证选项启动一个虚拟机进程,这两个选项数据会存储到全局DvmGlobals的dexOptMode和classVerifyMode中
*/
if (dvmPrepForDexOpt(bootClassPath, dexOptMode, verifyMode,
dexoptFlags) != 0)
{
ALOGE("DexOptZ: VM init failed");
goto bail;
}

//vmStarted = 1;

/* 开始优化和验证的处理 */
if (!dvmContinueOptimization(cacheFd, dexOffset, uncompLen, debugFileName,
modWhen, crc32, isBootstrap))
{
ALOGE("Optimization failed");
goto bail;
}

/* we don't shut the VM down -- process is about to exit */

result = 0;

bail:
dexZipCloseArchive(&zippy);
return result;
}

验证和优化的代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
{
...
/* 简答的检查下文件是否合法 */
if (dexLength < (int) sizeof(DexHeader)) {
ALOGE("too small to be DEX");
return false;
}
if (dexOffset < (int) sizeof(DexOptHeader)) {
ALOGE("not enough room for opt header");
return false;
}

bool result = false;

/*
* Drop this into a global so we don't have to pass it around. We could
* also add a field to DexFile, but since it only pertains to DEX
* creation that probably doesn't make sense.
*/
gDvm.optimizingBootstrapClass = isBootstrap;

{
/* 将整个dex文件映射到内存中 */
bool success;
void* mapAddr;
mapAddr = mmap(NULL, dexOffset + dexLength, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, 0);
if (mapAddr == MAP_FAILED) {
ALOGE("unable to mmap DEX cache: %s", strerror(errno));
goto bail;
}
/* 根据验证和优化的选项设置doVerify和doOpt */
bool doVerify, doOpt;
if (gDvm.classVerifyMode == VERIFY_MODE_NONE) {
doVerify = false;
} else if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE) {
doVerify = !gDvm.optimizingBootstrapClass;
} else /*if (gDvm.classVerifyMode == VERIFY_MODE_ALL)*/ {
doVerify = true;
}

if (gDvm.dexOptMode == OPTIMIZE_MODE_NONE) {
doOpt = false;
} else if (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED ||
gDvm.dexOptMode == OPTIMIZE_MODE_FULL) {
doOpt = doVerify;
} else /*if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)*/ {
doOpt = true;
}

/*
重写文件。字节重新排序,结构重新对齐,类验证信息及辅助数据
*/
success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
doVerify, doOpt, &pClassLookup, NULL);

if (success) {
DvmDex* pDvmDex = NULL;
u1* dexAddr = ((u1*) mapAddr) + dexOffset;
/* 验证odex文件 */
if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
ALOGE("Unable to create DexFile");
success = false;
} else {
/*
* If configured to do so, generate register map output
* for all verified classes. The register maps were
* generated during verification, and will now be serialized.
* 填充辅助数据区的结构
*/
if (gDvm.generateRegisterMaps) {
pRegMapBuilder = dvmGenerateRegisterMaps(pDvmDex);
if (pRegMapBuilder == NULL) {
ALOGE("Failed generating register maps");
success = false;
}
}

DexHeader* pHeader = (DexHeader*)pDvmDex->pHeader;
/* 重写dex的checksum值 */
updateChecksum(dexAddr, dexLength, pHeader);

dvmDexFileFree(pDvmDex);
}
}
...
}
...
}

接着看看重写文件的rewriteDex函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
static bool rewriteDex(u1* addr, int len, bool doVerify, bool doOpt,
DexClassLookup** ppClassLookup, DvmDex** ppDvmDex)
{
DexClassLookup* pClassLookup = NULL;
u8 prepWhen, loadWhen, verifyOptWhen;
DvmDex* pDvmDex = NULL;
bool result = false;
const char* msgStr = "???";

/* 字节排序 */
if (dexSwapAndVerify(addr, len) != 0)
goto bail;

/*
* 创建dexfile结构
*/
if (dvmDexFileOpenPartial(addr, len, &pDvmDex) != 0) {
ALOGE("Unable to create DexFile");
goto bail;
}

/*
* Create the class lookup table. This will eventually be appended
* to the end of the .odex.
*
* We create a temporary link from the DexFile for the benefit of
* class loading, below.
*/
pClassLookup = dexCreateClassLookup(pDvmDex->pDexFile);
if (pClassLookup == NULL)
goto bail;
pDvmDex->pDexFile->pClassLookup = pClassLookup;

/*
* If we're not going to attempt to verify or optimize the classes,
* there's no value in loading them, so bail out early.
*/
if (!doVerify && !doOpt) {
result = true;
goto bail;
}

prepWhen = dvmGetRelativeTimeUsec();

/*
* Load all classes found in this DEX file. If they fail to load for
* some reason, they won't get verified (which is as it should be).
* 加载dex中的所有类
*/
if (!loadAllClasses(pDvmDex))
goto bail;
loadWhen = dvmGetRelativeTimeUsec();

/*
* Create a data structure for use by the bytecode optimizer.
* We need to look up methods in a few classes, so this may cause
* a bit of class loading. We usually do this during VM init, but
* for dexopt on core.jar the order of operations gets a bit tricky,
* so we defer it to here.
*/
if (!dvmCreateInlineSubsTable())
goto bail;

/*
* Verify and optimize all classes in the DEX file (command-line
* options permitting).
*
* This is best-effort, so there's really no way for dexopt to
* fail at this point.
* 真正处理验证工作的函数
*/
verifyAndOptimizeClasses(pDvmDex->pDexFile, doVerify, doOpt);
verifyOptWhen = dvmGetRelativeTimeUsec();
...
}

先继续看看创建DexFile结构的函数dvmDexFileOpenPartial实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
int dvmDexFileOpenPartial(const void* addr, int len, DvmDex** ppDvmDex)
{
...
/* 解析Dex文件 */
pDexFile = dexFileParse((u1*)addr, len, parseFlags);
if (pDexFile == NULL) {
ALOGE("DEX parse failed");
goto bail;
}
/* 设置与dexfile结构辅助数据相关的字段 */
pDvmDex = allocateAuxStructures(pDexFile);
if (pDvmDex == NULL) {
dexFileFree(pDexFile);
goto bail;
}

pDvmDex->isMappedReadOnly = false;
*ppDvmDex = pDvmDex;
result = 0;

bail:
return result;
}

再看看解析dex文件的函数dexFileParse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
DexFile* dexFileParse(const u1* data, size_t length, int flags)
{
...
if (flags & kDexParseVerifyChecksum) {
/* 验证dex文件中的checksum字段 */
u4 adler = dexComputeChecksum(pHeader);
if (adler != pHeader->checksum) {
ALOGE("ERROR: bad checksum (%08x vs %08x)",
adler, pHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
ALOGV("+++ adler32 checksum (%08x) verified", adler);
}

const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
if (pOptHeader != NULL) {
/* 验证odex文件中的checksum */
adler = dexComputeOptChecksum(pOptHeader);
if (adler != pOptHeader->checksum) {
ALOGE("ERROR: bad opt checksum (%08x vs %08x)",
adler, pOptHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
}
}
}

/*
* Verify the SHA-1 digest. (Normally we don't want to do this --
* the digest is used to uniquely identify the original DEX file, and
* can't be computed for verification after the DEX is byte-swapped
* and optimized.)
*/
if (kVerifySignature) {
unsigned char sha1Digest[kSHA1DigestLen];
const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
kSHA1DigestLen;
//signature的验证
dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
char tmpBuf1[kSHA1DigestOutputLen];
char tmpBuf2[kSHA1DigestOutputLen];
ALOGE("ERROR: bad SHA1 digest (%s vs %s)",
dexSHA1DigestToStr(sha1Digest, tmpBuf1),
dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
if (!(flags & kDexParseContinueOnError))
goto bail;
} else {
ALOGV("+++ sha1 digest verified");
}
}
...
}

然后看看是怎么验证checksum的,看函数dexComputeChecksum

1
2
3
4
5
6
7
8
9
u4 dexComputeChecksum(const DexHeader* pHeader)
{
const u1* start = (const u1*) pHeader;

uLong adler = adler32(0L, Z_NULL, 0);
const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);

return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
}

这里看到最终是调用的adler32来进行计算。然后计算的逻辑是start是header的地址偏移。header的地址+magic的大小+checksum的大小。就是直接跳过这两个字段。指向第三个字段的位置。数据长度再用整个文件的大小-这两个字段的大小。

另外再看odex的checksum校验函数dexComputeOptChecksum

1
2
3
4
5
6
7
8
9
10
u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader)
{
const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset;
const u1* end = (const u1*) pOptHeader +
pOptHeader->optOffset + pOptHeader->optLength;

uLong adler = adler32(0L, Z_NULL, 0);

return (u4) adler32(adler, start, end - start);
}

逻辑和上面的基本一致,只是取值范围不一样。范围是depsOffset到optOffset+optLength的数据进行adler32。

然后继续看signature的验证函数。发现直接就是sha1计算。然后传入的参数是dex跳过magic、checksum、signature字段。sha1的结果再和signature对比,不相同就是验证失败

1
2
3
4
5
6
7
8
static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
unsigned char digest[])
{
SHA1_CTX context;
SHA1Init(&context);
SHA1Update(&context, data, length);
SHA1Final(digest, &context);
}

到这里再回头去看rewriteDex里面调用的verifyAndOptimizeClasses这个函数,这个是真正执行验证和优化的函数,里面调用了verifyAndOptimizeClass处理单个类的优化和验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static void verifyAndOptimizeClasses(DexFile* pDexFile, bool doVerify,
bool doOpt)
{
u4 count = pDexFile->pHeader->classDefsSize;
u4 idx;

for (idx = 0; idx < count; idx++) {
const DexClassDef* pClassDef;
const char* classDescriptor;
ClassObject* clazz;

pClassDef = dexGetClassDef(pDexFile, idx);
classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);

/* all classes are loaded into the bootstrap class loader */
clazz = dvmLookupClass(classDescriptor, NULL, false);
if (clazz != NULL) {
//处理单个类的验证和优化
verifyAndOptimizeClass(pDexFile, clazz, pClassDef, doVerify, doOpt);

} else {
// TODO: log when in verbose mode
ALOGV("DexOpt: not optimizing unavailable class '%s'",
classDescriptor);
}
}
}

继续看verifyAndOptimizeClass单个类的处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
static void verifyAndOptimizeClass(DexFile* pDexFile, ClassObject* clazz,
const DexClassDef* pClassDef, bool doVerify, bool doOpt)
{
...
classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);

/*
* 验证
*/
if (doVerify) {
if (dvmVerifyClass(clazz)) {
/*
* Set the "is preverified" flag in the DexClassDef. We
* do it here, rather than in the ClassObject structure,
* because the DexClassDef is part of the odex file.
*/
assert((clazz->accessFlags & JAVA_FLAGS_MASK) ==
pClassDef->accessFlags);
((DexClassDef*)pClassDef)->accessFlags |= CLASS_ISPREVERIFIED;
verified = true;
} else {
// TODO: log when in verbose mode
ALOGV("DexOpt: '%s' failed verification", classDescriptor);
}
}
/* 优化 */
if (doOpt) {
bool needVerify = (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED ||
gDvm.dexOptMode == OPTIMIZE_MODE_FULL);
if (!verified && needVerify) {
ALOGV("DexOpt: not optimizing '%s': not verified",
classDescriptor);
} else {
dvmOptimizeClass(clazz, false);

/* set the flag whether or not we actually changed anything */
((DexClassDef*)pClassDef)->accessFlags |= CLASS_ISOPTIMIZED;
}
}
}

继续看处理验证的函数dvmVerifyClass

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
bool dvmVerifyClass(ClassObject* clazz)
{
int i;

if (dvmIsClassVerified(clazz)) {
ALOGD("Ignoring duplicate verify attempt on %s", clazz->descriptor);
return true;
}
//遍历所有直接方法进行验证
for (i = 0; i < clazz->directMethodCount; i++) {
if (!verifyMethod(&clazz->directMethods[i])) {
LOG_VFY("Verifier rejected class %s", clazz->descriptor);
return false;
}
}
//遍历所有虚方法进行验证
for (i = 0; i < clazz->virtualMethodCount; i++) {
if (!verifyMethod(&clazz->virtualMethods[i])) {
LOG_VFY("Verifier rejected class %s", clazz->descriptor);
return false;
}
}

return true;
}

继续看验证的函数verifyMethod

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
static bool verifyMethod(Method* meth)
{
bool result = false;

/*
* Verifier state blob. Various values will be cached here so we
* can avoid expensive lookups and pass fewer arguments around.
*/
VerifierData vdata;
#if 1 // ndef NDEBUG
memset(&vdata, 0x99, sizeof(vdata));
#endif

vdata.method = meth;
vdata.insnsSize = dvmGetMethodInsnsSize(meth);
vdata.insnRegCount = meth->registersSize;
vdata.insnFlags = NULL;
vdata.uninitMap = NULL;
vdata.basicBlocks = NULL;

/*
* If there aren't any instructions, make sure that's expected, then
* exit successfully. Note: for native methods, meth->insns gets set
* to a native function pointer on first call, so don't use that as
* an indicator.
*/
if (vdata.insnsSize == 0) {
if (!dvmIsNativeMethod(meth) && !dvmIsAbstractMethod(meth)) {
LOG_VFY_METH(meth,
"VFY: zero-length code in concrete non-native method");
goto bail;
}

goto success;
}

/*
* Sanity-check the register counts. ins + locals = registers, so make
* sure that ins <= registers.
*/
if (meth->insSize > meth->registersSize) {
LOG_VFY_METH(meth, "VFY: bad register counts (ins=%d regs=%d)",
meth->insSize, meth->registersSize);
goto bail;
}

/*
* Allocate and populate an array to hold instruction data.
*
* TODO: Consider keeping a reusable pre-allocated array sitting
* around for smaller methods.
*/
vdata.insnFlags = (InsnFlags*) calloc(vdata.insnsSize, sizeof(InsnFlags));
if (vdata.insnFlags == NULL)
goto bail;

/*
* Compute the width of each instruction and store the result in insnFlags.
* Count up the #of occurrences of certain opcodes while we're at it.
*/
if (!computeWidthsAndCountOps(&vdata))
goto bail;

/*
* Allocate a map to hold the classes of uninitialized instances.
*/
vdata.uninitMap = dvmCreateUninitInstanceMap(meth, vdata.insnFlags,
vdata.newInstanceCount);
if (vdata.uninitMap == NULL)
goto bail;

/*
* Set the "in try" flags for all instructions guarded by a "try" block.
* Also sets the "branch target" flag on exception handlers.
*/
if (!scanTryCatchBlocks(meth, vdata.insnFlags))
goto bail;

/*
* Perform static instruction verification. Also sets the "branch
* target" flags.
* 验证方法中指令的数量以及正确性
*/
if (!verifyInstructions(&vdata))
goto bail;

/*
* Do code-flow analysis.
*
* We could probably skip this for a method with no registers, but
* that's so rare that there's little point in checking.
* 验证代码流的正确性
*/
if (!dvmVerifyCodeFlow(&vdata)) {
//ALOGD("+++ %s failed code flow", meth->name);
goto bail;
}

success:
result = true;

bail:
dvmFreeVfyBasicBlocks(&vdata);
dvmFreeUninitInstanceMap(vdata.uninitMap);
free(vdata.insnFlags);
return result;
}

看完验证的流程再看看优化的处理函数dvmOptimizeClass

1
2
3
4
5
6
7
8
9
10
11
void dvmOptimizeClass(ClassObject* clazz, bool essentialOnly)
{
int i;

for (i = 0; i < clazz->directMethodCount; i++) {
optimizeMethod(&clazz->directMethods[i], essentialOnly);
}
for (i = 0; i < clazz->virtualMethodCount; i++) {
optimizeMethod(&clazz->virtualMethods[i], essentialOnly);
}
}

使用optimizeMethod函数优化所有直接函数和虚函数。代码实现如下。主要是进行指令的替换。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static void optimizeMethod(Method* method, bool essentialOnly)
{
bool needRetBar, forSmp;
u4 insnsSize;
u2* insns;

if (dvmIsNativeMethod(method) || dvmIsAbstractMethod(method))
return;

forSmp = gDvm.dexOptForSmp;
needRetBar = needsReturnBarrier(method);

insns = (u2*) method->insns;
assert(insns != NULL);
insnsSize = dvmGetMethodInsnsSize(method);

while (insnsSize > 0) {
Opcode opc, quickOpc, volatileOpc;
size_t width;
bool matched = true;

opc = dexOpcodeFromCodeUnit(*insns);
width = dexGetWidthFromInstruction(insns);
volatileOpc = OP_NOP;

/*
* Each instruction may have:
* - "volatile" replacement
* - may be essential or essential-on-SMP
* - correctness replacement
* - may be essential or essential-on-SMP
* - performance replacement
* - always non-essential
*
* Replacements are considered in the order shown, and the first
* match is applied. For example, iget-wide will convert to
* iget-wide-volatile rather than iget-wide-quick if the target
* field is volatile.
*/

/*
* essential substitutions:
* {iget,iput,sget,sput}-wide --> {op}-wide-volatile
* invoke-direct[/range] --> invoke-object-init/range
*
* essential-on-SMP substitutions:
* {iget,iput,sget,sput}-* --> {op}-volatile
* return-void --> return-void-barrier
*
* non-essential substitutions:
* {iget,iput}-* --> {op}-quick
*
* TODO: might be time to merge this with the other two switches
*/
switch (opc) {
case OP_IGET:
case OP_IGET_BOOLEAN:
case OP_IGET_BYTE:
case OP_IGET_CHAR:
case OP_IGET_SHORT:
quickOpc = OP_IGET_QUICK;
if (forSmp)
volatileOpc = OP_IGET_VOLATILE;
goto rewrite_inst_field;
case OP_IGET_WIDE:
quickOpc = OP_IGET_WIDE_QUICK;
volatileOpc = OP_IGET_WIDE_VOLATILE;
goto rewrite_inst_field;
case OP_IGET_OBJECT:
quickOpc = OP_IGET_OBJECT_QUICK;
if (forSmp)
volatileOpc = OP_IGET_OBJECT_VOLATILE;
goto rewrite_inst_field;
case OP_IPUT:
case OP_IPUT_BOOLEAN:
case OP_IPUT_BYTE:
case OP_IPUT_CHAR:
case OP_IPUT_SHORT:
quickOpc = OP_IPUT_QUICK;
if (forSmp)
volatileOpc = OP_IPUT_VOLATILE;
goto rewrite_inst_field;
case OP_IPUT_WIDE:
quickOpc = OP_IPUT_WIDE_QUICK;
volatileOpc = OP_IPUT_WIDE_VOLATILE;
goto rewrite_inst_field;
case OP_IPUT_OBJECT:
quickOpc = OP_IPUT_OBJECT_QUICK;
if (forSmp)
volatileOpc = OP_IPUT_OBJECT_VOLATILE;
/* fall through */
rewrite_inst_field:
if (essentialOnly)
quickOpc = OP_NOP; /* if essential-only, no "-quick" sub */
if (quickOpc != OP_NOP || volatileOpc != OP_NOP)
rewriteInstField(method, insns, quickOpc, volatileOpc);
break;

case OP_SGET:
case OP_SGET_BOOLEAN:
case OP_SGET_BYTE:
case OP_SGET_CHAR:
case OP_SGET_SHORT:
if (forSmp)
volatileOpc = OP_SGET_VOLATILE;
goto rewrite_static_field;
case OP_SGET_WIDE:
volatileOpc = OP_SGET_WIDE_VOLATILE;
goto rewrite_static_field;
case OP_SGET_OBJECT:
if (forSmp)
volatileOpc = OP_SGET_OBJECT_VOLATILE;
goto rewrite_static_field;
case OP_SPUT:
case OP_SPUT_BOOLEAN:
case OP_SPUT_BYTE:
case OP_SPUT_CHAR:
case OP_SPUT_SHORT:
if (forSmp)
volatileOpc = OP_SPUT_VOLATILE;
goto rewrite_static_field;
case OP_SPUT_WIDE:
volatileOpc = OP_SPUT_WIDE_VOLATILE;
goto rewrite_static_field;
case OP_SPUT_OBJECT:
if (forSmp)
volatileOpc = OP_SPUT_OBJECT_VOLATILE;
/* fall through */
rewrite_static_field:
if (volatileOpc != OP_NOP)
rewriteStaticField(method, insns, volatileOpc);
break;

case OP_INVOKE_DIRECT:
case OP_INVOKE_DIRECT_RANGE:
if (!rewriteInvokeObjectInit(method, insns)) {
/* may want to try execute-inline, below */
matched = false;
}
break;
case OP_RETURN_VOID:
if (needRetBar)
rewriteReturnVoid(method, insns);
break;
default:
matched = false;
break;
}


/*
* non-essential substitutions:
* invoke-{virtual,direct,static}[/range] --> execute-inline
* invoke-{virtual,super}[/range] --> invoke-*-quick
*/
if (!matched && !essentialOnly) {
switch (opc) {
case OP_INVOKE_VIRTUAL:
if (!rewriteExecuteInline(method, insns, METHOD_VIRTUAL)) {
rewriteVirtualInvoke(method, insns,
OP_INVOKE_VIRTUAL_QUICK);
}
break;
case OP_INVOKE_VIRTUAL_RANGE:
if (!rewriteExecuteInlineRange(method, insns, METHOD_VIRTUAL)) {
rewriteVirtualInvoke(method, insns,
OP_INVOKE_VIRTUAL_QUICK_RANGE);
}
break;
case OP_INVOKE_SUPER:
rewriteVirtualInvoke(method, insns, OP_INVOKE_SUPER_QUICK);
break;
case OP_INVOKE_SUPER_RANGE:
rewriteVirtualInvoke(method, insns, OP_INVOKE_SUPER_QUICK_RANGE);
break;
case OP_INVOKE_DIRECT:
rewriteExecuteInline(method, insns, METHOD_DIRECT);
break;
case OP_INVOKE_DIRECT_RANGE:
rewriteExecuteInlineRange(method, insns, METHOD_DIRECT);
break;
case OP_INVOKE_STATIC:
rewriteExecuteInline(method, insns, METHOD_STATIC);
break;
case OP_INVOKE_STATIC_RANGE:
rewriteExecuteInlineRange(method, insns, METHOD_STATIC);
break;
default:
/* nothing to do for this instruction */
;
}
}

assert(width > 0);
assert(width <= insnsSize);
assert(width == dexGetWidthFromInstruction(insns));

insns += width;
insnsSize -= width;
}

assert(insnsSize == 0);
}

了解完整个dex的验证流程后,我们就可以修改dex后,自己制作工具修复dex的checksum和signature了。当然也有现成的工具帮我们修复这个dex的验证的。d2j-dex-recompute-checksum

./d2j-dex-recompute-checksum.sh -f ./Hello.dex

然后生成出Hello-rechecksum.dex文件。替换原来的dex即可。