{"id":1477,"date":"2019-08-14T13:39:35","date_gmt":"2019-08-14T11:39:35","guid":{"rendered":"https:\/\/myoceane.fr\/?p=1477"},"modified":"2020-05-04T22:39:09","modified_gmt":"2020-05-04T20:39:09","slug":"pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b","status":"publish","type":"post","link":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/","title":{"rendered":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b"},"content":{"rendered":"<div id=\"fb-root\"><\/div>\n\n<p style=\"text-align: justify;\">\u5728\u5b89\u88dd\u5b8c Jupyter Notebook \u5230\u81ea\u5df1\u7684 Server \u4e4b\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5728\u9060\u7aef\u57f7\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u7a0b\u5f0f\u4e86\uff01<a href=\"https:\/\/myoceane.fr\/?p=296\">\u53c3\u8003\u9023\u7d50<\/a>\u3002\u5728\u773e\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u4e2d\uff0c\u6700\u76f4\u89ba\u7684\u6f14\u7b97\u6cd5\u5c31\u5c6c\u6c7a\u7b56\u6a39 (Decision Tree) \u4e86\uff0c\u672c\u7bc7\u6559\u5b78\u662f\u4ee5 <a href=\"https:\/\/spark.apache.org\/docs\/2.2.0\/mllib-decision-tree.html\">Spark \u7bc4\u4f8b<\/a>\u4e2d\u5448\u73fe\u7684 Python \u7a0b\u5f0f\u78bc\u70ba\u4e3b\u8981\u793a\u7bc4\u5167\u5bb9\uff0c\u4e26\u4e14\u5229\u7528 Jupyter Notebook \u4f5c\u70ba\u57f7\u884c\u8f09\u5177\u3002<\/p>\n<pre class=\"lang:python\">from pyspark.mllib.tree import DecisionTree, DecisionTreeModel\nfrom pyspark.mllib.util import MLUtils\n\ndata = MLUtils.loadLibSVMFile(sc, 'data\/mllib\/sample_libsvm_data.txt')\n(trainingData, testData) = data.randomSplit([0.7, 0.3])\n\nmodel = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},                                    \n    impurity='gini', maxDepth=5, maxBins=32)\n\npredictions = model.predict(testData.map(lambda x: x.features))\nlabelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)\ntestErr = labelsAndPredictions.filter(\n    lambda lp: lp[0] != lp[1]).count() \/ float(testData.count())\nprint('Test Error = ' + str(testErr))\nprint('Learned classification tree model:')\nprint(model.toDebugString())\n\nmodel.save(sc, \"target\/tmp\/myDecisionTreeClassificationModel\")\nsameModel = DecisionTreeModel.load(sc, \"target\/tmp\/myDecisionTreeClassificationModel\")<\/pre>\n\n\n\n\n\n<p>\u5728\u958b\u59cb\u57f7\u884c\u7a0b\u5f0f\u4e4b\u524d\uff0c\u8981\u5148\u78ba\u8a8d\u9060\u7aef\u7684 Jupyter Notebook \u74b0\u5883\u662f\u5426\u5df2\u7d93\u5b89\u88dd pyspark \u51fd\u5f0f\u5eab\uff1f\u5982\u679c\u6c92\u6709\u5b89\u88dd\u6703\u5831\u932f\uff0c\u5426\u5247\u53ef\u4ee5\u5229\u7528\u4ee5\u4e0b\u7684 pip \u6307\u4ee4\u9032\u884c\u5b89\u88dd\u3002<\/p>\n<pre class=\"lang:bash\">pip search pyspark\npip install pyspark==2.3.2\n\nCollecting pyspark==2.3.2\n  Downloading https:\/\/files.pythonhosted.org\/packages\/5e\/cb\/d8ff49ba885e2c88b8cf2967edd84235ffa9ac301bffef657dfa5605a112\/pyspark-2.3.2.tar.gz (211.9MB)\n     |\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 211.9MB 83kB\/s \nRequirement already satisfied: py4j==0.10.7 in \/usr\/local\/lib\/python3.6\/site-packages (from pyspark==2.3.2) (0.10.7)\nBuilding wheels for collected packages: pyspark\n  Building wheel for pyspark (setup.py) ... done\n  Created wheel for pyspark: filename=pyspark-2.3.2-py2.py3-none-any.whl size=212344373 sha256=2eaf954ffab67bfdb211ab5cd336661b6b7d05638c7a38f212ec7f4ff82c447b\n  Stored in directory: \/root\/.cache\/pip\/wheels\/be\/7d\/34\/cd3cfbc75d8b6b6ae0658e5425348560b86d187fe3e53832cc\nSuccessfully built pyspark\nInstalling collected packages: pyspark\nSuccessfully installed pyspark-2.3.2<\/pre>\n\n\n\n<p>\u5099\u8a3b\uff1a\u9019\u908a\u9700\u8981\u6ce8\u610f\u4e00\u4e0b\uff0c\u5b89\u88dd\u7684 pyspark \u7248\u672c\u5fc5\u9808\u8981\u662f\u8ddf $SPARK_HOME \u7684 spark \u7248\u672c\u6578\u4e00\u81f4\uff0c\u5426\u5247\u5728\u521d\u59cb\u5316 SparkContext \u8ddf SparkSession \u7684\u6642\u5019\u6703\u4e00\u76f4\u5831\u4ee5\u4e0b\u7684\u932f\u8aa4\u8a0a\u606f\u3002<\/p>\n<pre class=\"lang:bash\">py4j.protocol.Py4JError: org.apache.spark.api.python.PythonUtils.getEncryptionEnabled does not exist in the JVM<\/pre>\n<p>\u767c\u73fe\u4e00\u958b\u59cb\u7684 sc \u4e26\u6c92\u6709\u6b63\u78ba\u88ab\u521d\u59cb\u5316\uff0c\u4e00\u822c\u521d\u59cb\u5316 SparkContext \u7684\u65b9\u6cd5\u6709\u4ee5\u4e0b\u5e7e\u7a2e\uff1a<\/p>\n<ul>\n<li>\u76f4\u63a5\u5f9e pyspark \u88e1\u9762\u8f09\u5165 SparkContext<\/li>\n<\/ul>\n<pre class=\"lang:python\">from pyspark import SparkContext\nsc = SparkContext()<\/pre>\n<ul>\n<li>\u5229\u7528 findspark<\/li>\n<\/ul>\n<pre class=\"lang:python\">import findspark\nfindspark.init('\/opt\/spark-2.3.2-bin-hadoop2.7')\nfindspark.find()\nsc = pyspark.SparkContext(appName=\"DecisionTree\")<\/pre>\n\n\n\n<p style=\"text-align: justify;\">\u63a5\u4e0b\u4f86\u5247\u662f\u8f09\u5165\u8cc7\u6599\uff0c\u9019\u908a\u5b98\u7db2\u4e0a\u5efa\u8b70\u53ef\u4ee5\u4f7f\u7528 Libsvm \u63d0\u4f9b\u7684\u53c3\u8003\u6578\u64da\uff0c\u6211\u5011\u62ff\u7b2c\u4e00\u500b\u8cc7\u6599\u4f5c\u70ba\u7bc4\u4f8b <a href=\"https:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets\/binary\/a1a\">https:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets\/binary\/a1a<\/a> \u4e26\u4e14\u5c55\u793a\u7b2c\u4e00\u500b\u8cc7\u6599\u5229\u7528 LabeledPoint \u5448\u73fe\uff1a<\/p>\n<pre class=\"lang:python\">data = MLUtils.loadLibSVMFile(sc, '\/home\/yuting\/.jupyter\/MachineLearning\/a1a')\ndata.first()\n\nLabeledPoint(-1.0, (119,[2,10,13,18,38,41,54,63,66,72,74,75,79,82],[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]))<\/pre>\n\n\n\n<p style=\"text-align: justify;\">\u7531\u65bc Libsvm \u63d0\u4f9b\u7684\u8cc7\u6599\u70ba (1, -1) \u4f5c\u70ba Label \u8207 mllib DecisionTree \u7684\u6a21\u578b\u8a2d\u5b9a\u4e0d\u540c\u6240\u4ee5\u9700\u8981\u5148\u7d93\u904e\u4e00\u500b\u6b63\u898f\u5283\u7684\u904e\u7a0b\u3002\u5f9e\u7d50\u679c\u6211\u5011\u53ef\u4ee5\u770b\u5230 LabeledPoint \u7684 -1 Label \u5df2\u7d93\u88ab\u6539\u6210 0 \u4e86\uff01<\/p>\n<pre class=\"lang:python\">from pyspark.mllib.regression import LabeledPoint\ndef normalize(point):\n    label = point.label\n    if label == 1:\n        return point\n    else: \n        return LabeledPoint(0, point.features)\n    \ndata.count()\ndataNormalized = data.map(lambda s: normalize(s))\ndataNormalized.first()\n\nLabeledPoint(0.0, (119,[2,10,13,18,38,41,54,63,66,72,74,75,79,82],[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]))<\/pre>\n\n\n\n<p>\u5c07\u8cc7\u6599\u5206\u6210 70% Training \u8207 30% Testing\uff0c\u4e26\u4e14\u8a2d\u5b9a DecisionTree \u5206\u8fa8\u5668\u5229\u7528\u4ee5\u4e0b\u7684\u53c3\u6578\uff1a<\/p>\n<ul>\n<li>numClasses=2&nbsp; <br>\u4ee3\u8868\u9019\u662f\u4e00\u500b Classification \u7684\u554f\u984c\uff0c\u9810\u6e2c\u503c\u70ba 0, 1<\/li>\n<li>impurity=&#8217;gini&#8217;<br>\u91dd\u5c0d Classification \u7684\u554f\u984c\u53ef\u4ee5\u9078\u64c7 gini, \u4e5f\u53ef\u4ee5\u9078\u64c7 entropy \u4f5c\u70ba\u8aa4\u5dee\u503c\u7684\u91cf\u6e2c\uff0c\u5982\u679c\u662f Regression \u7684\u554f\u984c\u5247\u53ef\u4ee5\u63a1\u7528 Variance \u53bb\u91cf\u6e2c\u8aa4\u5dee\u3002<br><img loading=\"lazy\" decoding=\"async\" class=\" wp-image-1539 aligncenter\" src=\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/\u87a2\u5e55\u5feb\u7167-2019-08-14-\u4e0b\u53481.29.53-300x69.png\" alt=\"\" width=\"713\" height=\"164\" srcset=\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/\u87a2\u5e55\u5feb\u7167-2019-08-14-\u4e0b\u53481.29.53-300x69.png 300w, https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/\u87a2\u5e55\u5feb\u7167-2019-08-14-\u4e0b\u53481.29.53-768x176.png 768w, https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/\u87a2\u5e55\u5feb\u7167-2019-08-14-\u4e0b\u53481.29.53-1024x234.png 1024w, https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/\u87a2\u5e55\u5feb\u7167-2019-08-14-\u4e0b\u53481.29.53.png 1774w\" sizes=\"auto, (max-width: 713px) 100vw, 713px\" \/><\/li>\n<li>maxDepth=5<br>\u8d8a\u6df1\u7684\u6c7a\u7b56\u6a39\u53ef\u4ee5\u5f97\u5230\u6bd4\u8f03\u4f4e\u7684 training error \u4f46\u662f\u4e5f\u6703\u6bd4\u8f03\u5bb9\u6613 overfitting\u3002<\/li>\n<li>maxBins=32<\/li>\n<\/ul>\n<pre class=\"lang:python\">(trainingData, testData) = dataNormalized.randomSplit([0.7, 0.3])\nmodel = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, impurity='gini', maxDepth=5, maxBins=32)\n                                     \npredictions = model.predict(testData.map(lambda x: x.features))\nlabelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)\ntestErr = labelsAndPredictions.filter(\n    lambda lp: lp[0] != lp[1]).count() \/ float(testData.count())\nprint('Test Error = ' + str(testErr))\nprint('Learned classification tree model:')\nprint(model.toDebugString())<\/pre>\n\n\n\n<p>\u5f97\u5230\u4ee5\u4e0b\u7d50\u679c\uff0c\u7d50\u679c\u6e05\u695a\u5448\u73fe\u6c7a\u7b56\u6a39\u6c7a\u7b56\u7684\u904e\u7a0b\u8207\u5229\u7528 Testing data \u7522\u751f\u7684\u8aa4\u5dee\u503c\uff01<\/p>\n<pre class=\"lang:python\">Test Error = 0.19807692307692307\nLearned classification tree model:\nDecisionTreeModel classifier of depth 5 with 39 nodes\n  If (feature 39 &lt;= 0.5)\n   If (feature 31 &lt;= 0.5)\n    If (feature 38 &lt;= 0.5)\n     If (feature 98 &lt;= 0.5)\n      If (feature 73 &lt;= 0.5)\n       Predict: 0.0\n      Else (feature 73 &gt; 0.5)\n       Predict: 0.0\n     Else (feature 98 &gt; 0.5)\n      Predict: 1.0\n    Else (feature 38 &gt; 0.5)\n     If (feature 81 &lt;= 0.5)\n      If (feature 22 &lt;= 0.5)\n       Predict: 0.0\n      Else (feature 22 &gt; 0.5)\n       Predict: 1.0\n     Else (feature 81 &gt; 0.5)\n      If (feature 71 &lt;= 0.5)\n       Predict: 0.0\n      Else (feature 71 &gt; 0.5)\n       Predict: 1.0\n   Else (feature 31 &gt; 0.5)\n    Predict: 1.0\n  Else (feature 39 &gt; 0.5)\n   If (feature 38 &lt;= 0.5)\n    If (feature 34 &lt;= 0.5)\n     If (feature 8 &lt;= 0.5)\n      If (feature 7 &lt;= 0.5)\n       Predict: 0.0\n      Else (feature 7 &gt; 0.5)\n       Predict: 1.0\n     Else (feature 8 &gt; 0.5)\n      If (feature 46 &lt;= 0.5)\n       Predict: 1.0\n      Else (feature 46 &gt; 0.5)\n       Predict: 0.0\n    Else (feature 34 &gt; 0.5)\n     If (feature 90 &lt;= 0.5)\n      If (feature 73 &lt;= 0.5)\n       Predict: 0.0\n      Else (feature 73 &gt; 0.5)\n       Predict: 0.0\n     Else (feature 90 &gt; 0.5)\n      Predict: 1.0\n   Else (feature 38 &gt; 0.5)\n    If (feature 48 &lt;= 0.5)\n     If (feature 1 &lt;= 0.5)\n      If (feature 62 &lt;= 0.5)\n       Predict: 1.0\n      Else (feature 62 &gt; 0.5)\n       Predict: 1.0\n     Else (feature 1 &gt; 0.5)\n      If (feature 75 &lt;= 0.5)\n       Predict: 1.0\n      Else (feature 75 &gt; 0.5)\n       Predict: 0.0\n    Else (feature 48 &gt; 0.5)\n     Predict: 0.0<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u5b89\u88dd\u5b8c Jupyter Notebook \u5230\u81ea\u5df1\u7684 Server \u4e4b\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5728\u9060\u7aef\u57f7\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u7a0b\u5f0f\u4e86\uff01\u53c3\u8003\u9023\u7d50\u3002\u5728\u773e\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u4e2d\uff0c\u6700\u76f4\u89ba\u7684\u6f14\u7b97\u6cd5\u5c31\u5c6c\u6c7a\u7b56\u6a39 (Decision Tree) \u4e86\uff0c\u672c\u7bc7\u6559\u5b78\u662f\u4ee5 Spark \u7bc4\u4f8b\u4e2d\u5448\u73fe\u7684 Python \u7a0b\u5f0f\u78bc\u70ba\u4e3b\u8981\u793a\u7bc4\u5167\u5bb9\uff0c\u4e26\u4e14\u5229\u7528 Jupyter Notebook \u4f5c\u70ba\u57f7\u884c\u8f09\u5177\u3002<\/p>\n","protected":false},"author":1,"featured_media":1571,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[9,176],"tags":[],"class_list":["post-1477","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-bigdata-ml","category-python"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v24.6 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/myoceane.fr\/index.php\/pyspark-\u6c7a\u7b56\u6a39\u7bc4\u4f8b\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane\" \/>\n<meta property=\"og:description\" content=\"\u5728\u5b89\u88dd\u5b8c Jupyter Notebook \u5230\u81ea\u5df1\u7684 Server \u4e4b\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5728\u9060\u7aef\u57f7\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u7a0b\u5f0f\u4e86\uff01\u53c3\u8003\u9023\u7d50\u3002\u5728\u773e\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u4e2d\uff0c\u6700\u76f4\u89ba\u7684\u6f14\u7b97\u6cd5\u5c31\u5c6c\u6c7a\u7b56\u6a39 (Decision Tree) \u4e86\uff0c\u672c\u7bc7\u6559\u5b78\u662f\u4ee5 Spark \u7bc4\u4f8b\u4e2d\u5448\u73fe\u7684 Python \u7a0b\u5f0f\u78bc\u70ba\u4e3b\u8981\u793a\u7bc4\u5167\u5bb9\uff0c\u4e26\u4e14\u5229\u7528 Jupyter Notebook \u4f5c\u70ba\u57f7\u884c\u8f09\u5177\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/myoceane.fr\/index.php\/pyspark-\u6c7a\u7b56\u6a39\u7bc4\u4f8b\/\" \/>\n<meta property=\"og:site_name\" content=\"\u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane\" \/>\n<meta property=\"article:published_time\" content=\"2019-08-14T11:39:35+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2020-05-04T20:39:09+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png\" \/>\n\t<meta property=\"og:image:width\" content=\"912\" \/>\n\t<meta property=\"og:image:height\" content=\"438\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"\u6ab8\u6aac\u7238\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u6ab8\u6aac\u7238\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"3 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/\"},\"author\":{\"name\":\"\u6ab8\u6aac\u7238\",\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b\"},\"headline\":\"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b\",\"datePublished\":\"2019-08-14T11:39:35+00:00\",\"dateModified\":\"2020-05-04T20:39:09+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/\"},\"wordCount\":67,\"commentCount\":0,\"publisher\":{\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b\"},\"image\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png\",\"articleSection\":[\"Big Data &amp; Machine Learning\",\"Python\"],\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"CommentAction\",\"name\":\"Comment\",\"target\":[\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#respond\"]}]},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/\",\"url\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/\",\"name\":\"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane\",\"isPartOf\":{\"@id\":\"https:\/\/myoceane.fr\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png\",\"datePublished\":\"2019-08-14T11:39:35+00:00\",\"dateModified\":\"2020-05-04T20:39:09+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage\",\"url\":\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png\",\"contentUrl\":\"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png\",\"width\":912,\"height\":438},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/myoceane.fr\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/myoceane.fr\/#website\",\"url\":\"https:\/\/myoceane.fr\/\",\"name\":\"M-Y-Oceane \u60f3\u65b9\u6d89\u6cd5\u3002\u91cf\u74f6\u5916\u7684\u5929\u7a7a\",\"description\":\"\u60f3\u65b9\u6d89\u6cd5, France, Taiwan, Health, Information Technology\",\"publisher\":{\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/myoceane.fr\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":[\"Person\",\"Organization\"],\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b\",\"name\":\"\u6ab8\u6aac\u7238\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/6cc678684664f8ad45a8d56a6630b183?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/6cc678684664f8ad45a8d56a6630b183?s=96&d=mm&r=g\",\"caption\":\"\u6ab8\u6aac\u7238\"},\"logo\":{\"@id\":\"https:\/\/myoceane.fr\/#\/schema\/person\/image\/\"},\"url\":\"https:\/\/myoceane.fr\/index.php\/author\/johnny5584767gmail-com\/\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/myoceane.fr\/index.php\/pyspark-\u6c7a\u7b56\u6a39\u7bc4\u4f8b\/","og_locale":"en_US","og_type":"article","og_title":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane","og_description":"\u5728\u5b89\u88dd\u5b8c Jupyter Notebook \u5230\u81ea\u5df1\u7684 Server \u4e4b\u5f8c\u6211\u5011\u5c31\u53ef\u4ee5\u5728\u9060\u7aef\u57f7\u884c\u6a5f\u5668\u5b78\u7fd2\u7684\u7a0b\u5f0f\u4e86\uff01\u53c3\u8003\u9023\u7d50\u3002\u5728\u773e\u591a\u6a5f\u5668\u5b78\u7fd2\u7684\u6f14\u7b97\u6cd5\u4e2d\uff0c\u6700\u76f4\u89ba\u7684\u6f14\u7b97\u6cd5\u5c31\u5c6c\u6c7a\u7b56\u6a39 (Decision Tree) \u4e86\uff0c\u672c\u7bc7\u6559\u5b78\u662f\u4ee5 Spark \u7bc4\u4f8b\u4e2d\u5448\u73fe\u7684 Python \u7a0b\u5f0f\u78bc\u70ba\u4e3b\u8981\u793a\u7bc4\u5167\u5bb9\uff0c\u4e26\u4e14\u5229\u7528 Jupyter Notebook \u4f5c\u70ba\u57f7\u884c\u8f09\u5177\u3002","og_url":"https:\/\/myoceane.fr\/index.php\/pyspark-\u6c7a\u7b56\u6a39\u7bc4\u4f8b\/","og_site_name":"\u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane","article_published_time":"2019-08-14T11:39:35+00:00","article_modified_time":"2020-05-04T20:39:09+00:00","og_image":[{"width":912,"height":438,"url":"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png","type":"image\/png"}],"author":"\u6ab8\u6aac\u7238","twitter_card":"summary_large_image","twitter_misc":{"Written by":"\u6ab8\u6aac\u7238","Est. reading time":"3 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#article","isPartOf":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/"},"author":{"name":"\u6ab8\u6aac\u7238","@id":"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b"},"headline":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b","datePublished":"2019-08-14T11:39:35+00:00","dateModified":"2020-05-04T20:39:09+00:00","mainEntityOfPage":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/"},"wordCount":67,"commentCount":0,"publisher":{"@id":"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b"},"image":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage"},"thumbnailUrl":"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png","articleSection":["Big Data &amp; Machine Learning","Python"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/","url":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/","name":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b - \u60f3\u65b9\u6d89\u6cd5 - \u91cf\u74f6\u5916\u7684\u5929\u7a7a M-Y-Oceane","isPartOf":{"@id":"https:\/\/myoceane.fr\/#website"},"primaryImageOfPage":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage"},"image":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage"},"thumbnailUrl":"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png","datePublished":"2019-08-14T11:39:35+00:00","dateModified":"2020-05-04T20:39:09+00:00","breadcrumb":{"@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#primaryimage","url":"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png","contentUrl":"https:\/\/myoceane.fr\/wp-content\/uploads\/2019\/08\/DecisionTree.png","width":912,"height":438},{"@type":"BreadcrumbList","@id":"https:\/\/myoceane.fr\/index.php\/pyspark-%e6%b1%ba%e7%ad%96%e6%a8%b9%e7%af%84%e4%be%8b\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/myoceane.fr\/"},{"@type":"ListItem","position":2,"name":"[PySpark] \u6c7a\u7b56\u6a39\u7bc4\u4f8b"}]},{"@type":"WebSite","@id":"https:\/\/myoceane.fr\/#website","url":"https:\/\/myoceane.fr\/","name":"M-Y-Oceane \u60f3\u65b9\u6d89\u6cd5\u3002\u91cf\u74f6\u5916\u7684\u5929\u7a7a","description":"\u60f3\u65b9\u6d89\u6cd5, France, Taiwan, Health, Information Technology","publisher":{"@id":"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/myoceane.fr\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":["Person","Organization"],"@id":"https:\/\/myoceane.fr\/#\/schema\/person\/4a4552fb8c27693083d465e12db7658b","name":"\u6ab8\u6aac\u7238","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/myoceane.fr\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/6cc678684664f8ad45a8d56a6630b183?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/6cc678684664f8ad45a8d56a6630b183?s=96&d=mm&r=g","caption":"\u6ab8\u6aac\u7238"},"logo":{"@id":"https:\/\/myoceane.fr\/#\/schema\/person\/image\/"},"url":"https:\/\/myoceane.fr\/index.php\/author\/johnny5584767gmail-com\/"}]}},"amp_enabled":false,"_links":{"self":[{"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/posts\/1477","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/comments?post=1477"}],"version-history":[{"count":79,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/posts\/1477\/revisions"}],"predecessor-version":[{"id":1572,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/posts\/1477\/revisions\/1572"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/media\/1571"}],"wp:attachment":[{"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/media?parent=1477"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/categories?post=1477"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/myoceane.fr\/index.php\/wp-json\/wp\/v2\/tags?post=1477"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}