diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index f4aa815b690ac17505a1b59f39770e926b2e7877..461df93e825edf81e98d22683cfbf4fe7b4404aa 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -15,6 +15,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef CONFIG_MSM_OCMEM
+#  include <mach/ocmem.h>
+#endif
+
 #include "a3xx_gpu.h"
 
 #define A3XX_INT0_MASK \
@@ -63,6 +67,7 @@ static void a3xx_me_init(struct msm_gpu *gpu)
 static int a3xx_hw_init(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
 	uint32_t *ptr, len;
 	int i, ret;
 
@@ -105,6 +110,21 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 
+	} else if (adreno_is_a330v2(adreno_gpu)) {
+		/*
+		 * Most of the VBIF registers on 8974v2 have the correct
+		 * values at power on, so we won't modify those if we don't
+		 * need to
+		 */
+		/* Enable 1k sort: */
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
+		/* Enable WR-REQ: */
+		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
+		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
+		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
+		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
+
 	} else if (adreno_is_a330(adreno_gpu)) {
 		/* Set up 16 deep read/write request queues: */
 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
@@ -121,10 +141,10 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
 		/* Set up AOOO: */
-		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff);
-		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
 		/* Enable 1K sort: */
-		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff);
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
 		/* Disable VBIF clock gating. This is to enable AXI running
 		 * higher frequency than GPU:
@@ -162,14 +182,23 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
 
 	/* Enable Clock gating: */
-	gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
-
-	/* Set the OCMEM base address for A330 */
-//TODO:
-//	if (adreno_is_a330(adreno_gpu)) {
-//		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
-//			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
-//	}
+	if (adreno_is_a320(adreno_gpu))
+		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
+	else if (adreno_is_a330v2(adreno_gpu))
+		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
+	else if (adreno_is_a330(adreno_gpu))
+		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
+
+	if (adreno_is_a330v2(adreno_gpu))
+		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
+	else if (adreno_is_a330(adreno_gpu))
+		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
+
+	/* Set the OCMEM base address for A330, etc */
+	if (a3xx_gpu->ocmem_hdl) {
+		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
+			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
+	}
 
 	/* Turn on performance counters: */
 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
@@ -238,12 +267,19 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
 
 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
-	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu))
+	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
-
+	} else if (adreno_is_a330(adreno_gpu)) {
+		/* NOTE: this (value take from downstream android driver)
+		 * includes some bits outside of the known bitfields.  But
+		 * A330 has this "MERCIU queue" thing too, which might
+		 * explain a new bitfield or reshuffling:
+		 */
+		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
+	}
 
 	/* clear ME_HALT to start micro engine */
 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
@@ -253,6 +289,14 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 	return 0;
 }
 
+static void a3xx_recover(struct msm_gpu *gpu)
+{
+	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
+	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
+	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
+	adreno_recover(gpu);
+}
+
 static void a3xx_destroy(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -261,6 +305,12 @@ static void a3xx_destroy(struct msm_gpu *gpu)
 	DBG("%s", gpu->name);
 
 	adreno_gpu_cleanup(adreno_gpu);
+
+#ifdef CONFIG_MSM_OCMEM
+	if (a3xx_gpu->ocmem_base)
+		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
+#endif
+
 	put_device(&a3xx_gpu->pdev->dev);
 	kfree(a3xx_gpu);
 }
@@ -371,7 +421,7 @@ static const struct adreno_gpu_funcs funcs = {
 		.hw_init = a3xx_hw_init,
 		.pm_suspend = msm_gpu_pm_suspend,
 		.pm_resume = msm_gpu_pm_resume,
-		.recover = adreno_recover,
+		.recover = a3xx_recover,
 		.last_fence = adreno_last_fence,
 		.submit = adreno_submit,
 		.flush = adreno_flush,
@@ -387,6 +437,7 @@ static const struct adreno_gpu_funcs funcs = {
 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 {
 	struct a3xx_gpu *a3xx_gpu = NULL;
+	struct adreno_gpu *adreno_gpu;
 	struct msm_gpu *gpu;
 	struct platform_device *pdev = a3xx_pdev;
 	struct adreno_platform_config *config;
@@ -406,7 +457,8 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	gpu = &a3xx_gpu->base.base;
+	adreno_gpu = &a3xx_gpu->base;
+	gpu = &adreno_gpu->base;
 
 	get_device(&pdev->dev);
 	a3xx_gpu->pdev = pdev;
@@ -421,11 +473,25 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
 
-	ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base,
-			&funcs, config->rev);
+	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
 	if (ret)
 		goto fail;
 
+	/* if needed, allocate gmem: */
+	if (adreno_is_a330(adreno_gpu)) {
+#ifdef CONFIG_MSM_OCMEM
+		/* TODO this is different/missing upstream: */
+		struct ocmem_buf *ocmem_hdl =
+				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
+
+		a3xx_gpu->ocmem_hdl = ocmem_hdl;
+		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
+		adreno_gpu->gmem = ocmem_hdl->len;
+		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
+				a3xx_gpu->ocmem_base);
+#endif
+	}
+
 	if (!gpu->mmu) {
 		/* TODO we think it is possible to configure the GPU to
 		 * restrict access to VRAM carveout.  But the required
@@ -460,7 +526,42 @@ static int a3xx_probe(struct platform_device *pdev)
 {
 	static struct adreno_platform_config config = {};
 #ifdef CONFIG_OF
-	/* TODO */
+	struct device_node *child, *node = pdev->dev.of_node;
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(node, "qcom,chipid", &val);
+	if (ret) {
+		dev_err(&pdev->dev, "could not find chipid: %d\n", ret);
+		return ret;
+	}
+
+	config.rev = ADRENO_REV((val >> 24) & 0xff,
+			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+
+	/* find clock rates: */
+	config.fast_rate = 0;
+	config.slow_rate = ~0;
+	for_each_child_of_node(node, child) {
+		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
+			struct device_node *pwrlvl;
+			for_each_child_of_node(child, pwrlvl) {
+				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
+				if (ret) {
+					dev_err(&pdev->dev, "could not find gpu-freq: %d\n", ret);
+					return ret;
+				}
+				config.fast_rate = max(config.fast_rate, val);
+				config.slow_rate = min(config.slow_rate, val);
+			}
+		}
+	}
+
+	if (!config.fast_rate) {
+		dev_err(&pdev->dev, "could not find clk rates\n");
+		return -ENXIO;
+	}
+
 #else
 	struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
 	uint32_t version = socinfo_get_version();
@@ -519,10 +620,19 @@ static int a3xx_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,kgsl-3d0" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
 static struct platform_driver a3xx_driver = {
 	.probe = a3xx_probe,
 	.remove = a3xx_remove,
-	.driver.name = "kgsl-3d0",
+	.driver = {
+		.name = "kgsl-3d0",
+		.of_match_table = dt_match,
+	},
 };
 
 void __init a3xx_register(void)
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
index 32c398c2d00a7103a64a0e76f248b0ce4a712db0..bb9a8ca0507b3cdf4a16be9d59383459d6ff4c13 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
@@ -24,6 +24,10 @@
 struct a3xx_gpu {
 	struct adreno_gpu base;
 	struct platform_device *pdev;
+
+	/* if OCMEM is used for GMEM: */
+	uint32_t ocmem_base;
+	void *ocmem_hdl;
 };
 #define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base)
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 3f1c7b27e33e074688396d43b8d3076d6e4e8a9e..d321099abdd45ac17b37bc32ae8399ce95bc6f32 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -45,7 +45,7 @@ static const struct adreno_info gpulist[] = {
 		.pfpfw = "a300_pfp.fw",
 		.gmem  = SZ_512K,
 	}, {
-		.rev   = ADRENO_REV(3, 3, 0, 0),
+		.rev   = ADRENO_REV(3, 3, 0, ANY_ID),
 		.revn  = 330,
 		.name  = "A330",
 		.pm4fw = "a330_pm4.fw",
@@ -71,7 +71,7 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 		*value = adreno_gpu->info->revn;
 		return 0;
 	case MSM_PARAM_GMEM_SIZE:
-		*value = adreno_gpu->info->gmem;
+		*value = adreno_gpu->gmem;
 		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
@@ -92,7 +92,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
 			/* size is log2(quad-words): */
 			AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
-			AXXX_CP_RB_CNTL_BLKSZ(RB_BLKSIZE));
+			AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
 
 	/* Setup ringbuffer address: */
 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova);
@@ -318,6 +318,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 			rev.core, rev.major, rev.minor, rev.patchid);
 
 	gpu->funcs = funcs;
+	gpu->gmem = gpu->info->gmem;
 	gpu->rev = rev;
 
 	ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 451b741fbd1272f8d8ab52007f7826c77ea3459f..ca11ea4da165082fd3ab2f2ecae981d5cd7543ad 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -51,6 +51,7 @@ struct adreno_gpu {
 	struct msm_gpu base;
 	struct adreno_rev rev;
 	const struct adreno_info *info;
+	uint32_t gmem;  /* actual gmem size */
 	uint32_t revn;  /* numeric revision name */
 	const struct adreno_gpu_funcs *funcs;
 
@@ -97,6 +98,11 @@ static inline bool adreno_is_a330(struct adreno_gpu *gpu)
 	return gpu->revn == 330;
 }
 
+static inline bool adreno_is_a330v2(struct adreno_gpu *gpu)
+{
+	return adreno_is_a330(gpu) && (gpu->rev.patchid > 0);
+}
+
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
 int adreno_hw_init(struct msm_gpu *gpu);
 uint32_t adreno_last_fence(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 014a3fd04f62002d88337b807ac0c0f32ed2d56f..92b7459862314a6e41756fae21d5496329b0be5c 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -40,7 +40,7 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
 	for (i = 0; i < cnt; i++) {
 		struct device *msm_iommu_get_ctx(const char *ctx_name);
 		struct device *ctx = msm_iommu_get_ctx(names[i]);
-		if (!ctx)
+		if (IS_ERR_OR_NULL(ctx))
 			continue;
 		ret = iommu_attach_device(iommu->domain, ctx);
 		if (ret) {