{ "cells": [ { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "## Python Version - 3\n", "## Scikit Learn Version - \n", "import pandas as pd\n", "import numpy as np\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.feature_extraction import DictVectorizer\n", "from sklearn.svm import SVC\n", "from sklearn.cluster import KMeans\n", "from sklearn import decomposition\n", "from sklearn import metrics\n", "from sklearn import datasets\n", "#import seaborn as sb\n", "from sklearn.metrics import silhouette_samples, silhouette_score\n", "from sklearn import preprocessing\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_squared_error\n", "\n", "def encode_onehot(df, cols):\n", " vec = DictVectorizer()\n", " \n", " vec_data = pd.DataFrame(vec.fit_transform(df[cols].to_dict(orient='records')).toarray())\n", " vec_data.columns = vec.get_feature_names()\n", " vec_data.index = df.index\n", " \n", " df = df.drop(cols, axis=1)\n", " df = df.join(vec_data)\n", " return df\n", "\n", "def encode_label(df, cols):\n", " le = preprocessing.LabelEncoder()\n", " \n", " le_data = pd.DataFrame(le.fit_transform(df[cols]))\n", " le_data.columns = cols\n", " le_data.index = df.index\n", " \n", " le_data.to_csv(path_or_buf='C:/ct_gov_condition_label.csv')\n", " \n", " return df\n", "\n", "def getAgeInWeeks(x):\n", " if (pd.isnull(x)):\n", " return 0\n", " elif (x.endswith('Years')):\n", " return 52 * int(x.rstrip('Years '))\n", " elif (x.endswith('Months')):\n", " return 4.5 * int(x.rstrip('Months '))\n", " elif (x.endswith('Weeks')):\n", " return int(x.rstrip('Weeks '))\n", " elif (x.endswith('Days')):\n", " return int(x.rstrip('Days ')) / 7\n", " \n", " return x\n", "\n", "def getSignificance(x):\n", " \n", " x=float(x)\n", " if (pd.isnull(x)):\n", " return 0\n", " elif (x > 0 and x<=.02):\n", " return 1\n", " elif (x > .02 and x<=.03):\n", " return 2\n", " elif (x > .03 and x<=.04):\n", " return 3\n", " elif (x > .04 and x<=.05):\n", " return 4\n", " elif (x > .05 and x<=.1):\n", " return 5\n", " elif (x > .1 and x<=.3):\n", " return 6 \n", " elif (x > .3 and x<=.5):\n", " return 7 \n", " elif (x > .5 and x<=.7):\n", " return 8 \n", " elif (x > .7):\n", " return 9 \n", " \n", " return x\n", "\n", "def getNoDefined(x):\n", " if (pd.isnull(x)):\n", " return 'NotDefined'\n", " \n", " return x\n", " \n", "def getZero(x):\n", " if (pd.isnull(x)):\n", " return 0\n", " \n", " return x\n", " \n", "\n", "\n", "\n", "def getEnrollmentdf(df):\n", " # Lets use the features that are Study Attribute and derived Enrollment. \n", " one_hot_reduced = df[['PHASE','STUDY_TYPE','NUMBER_OF_ARMS','GENDER','CONDITION','INTERVENTION_TYPE','ENDPOINT_CLASSIFICATION','INTERVENTION_MODEL','OBSERVATION_MODEL','MASKING','TIME_PERSPECTIVE','PRIMARY_PURPOSE','ENROLLMENT']] \n", "\n", " # Encoding the text, note that \"Condition\" has been encoded to \n", " one_hot_xform = encode_onehot(one_hot_reduced, [ 'PHASE', 'STUDY_TYPE', 'GENDER','INTERVENTION_TYPE','ENDPOINT_CLASSIFICATION','INTERVENTION_MODEL','OBSERVATION_MODEL','MASKING','TIME_PERSPECTIVE','PRIMARY_PURPOSE'])\n", " one_hot_xform.fillna(0, inplace=True)\n", " \n", " return one_hot_xform\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OVERALL_STATUSCOMPLETION_DATE_TYPEPHASESTUDY_TYPENUMBER_OF_ARMSNUMBER_OF_GROUPSENROLLMENT_TYPEENROLLMENTGENDERMINIMUM_AGE...NUMBER_OF_INVESTIGATORSPARTFLOW_COUNT_STARTEDPARTFLOW_COUNT_NOT_COMPLETEDPARTFLOW_COUNT_COMPLETEDINTERVENTION_MODELOBSERVATION_MODELMASKINGTIME_PERSPECTIVEPRIMARY_PURPOSEENDPOINT_CLASSIFICATION
NCT_ID
NCT00000378CompletedActualPhase 4Interventional2.00.0Actual110Both3120...016364771159Parallel AssignmentNotDefinedDouble BlindNotDefinedTreatmentEfficacy Study
NCT00001656CompletedActualPhase 4Interventional2.00.0Actual25Both364...016364771159Parallel AssignmentNotDefinedDouble BlindNotDefinedTreatmentEfficacy Study
NCT00004859TerminatedActualPhase 3Interventional2.00.0Actual589Both936...016364771159Parallel AssignmentNotDefinedOpen LabelNotDefinedTreatmentEfficacy Study
NCT00001959CompletedActualPhase 2Interventional1.00.0Actual21Both936...016364771159Single Group AssignmentNotDefinedOpen LabelNotDefinedTreatmentSafety/Efficacy Study
NCT00003222CompletedNaNPhase 2Interventional2.00.0Actual40Both936...016364771159Parallel AssignmentNotDefinedOpen LabelNotDefinedTreatmentEfficacy Study
\n", "

5 rows × 33 columns

\n", "
" ], "text/plain": [ " OVERALL_STATUS COMPLETION_DATE_TYPE PHASE STUDY_TYPE \\\n", "NCT_ID \n", "NCT00000378 Completed Actual Phase 4 Interventional \n", "NCT00001656 Completed Actual Phase 4 Interventional \n", "NCT00004859 Terminated Actual Phase 3 Interventional \n", "NCT00001959 Completed Actual Phase 2 Interventional \n", "NCT00003222 Completed NaN Phase 2 Interventional \n", "\n", " NUMBER_OF_ARMS NUMBER_OF_GROUPS ENROLLMENT_TYPE ENROLLMENT \\\n", "NCT_ID \n", "NCT00000378 2.0 0.0 Actual 110 \n", "NCT00001656 2.0 0.0 Actual 25 \n", "NCT00004859 2.0 0.0 Actual 589 \n", "NCT00001959 1.0 0.0 Actual 21 \n", "NCT00003222 2.0 0.0 Actual 40 \n", "\n", " GENDER MINIMUM_AGE ... \\\n", "NCT_ID ... \n", "NCT00000378 Both 3120 ... \n", "NCT00001656 Both 364 ... \n", "NCT00004859 Both 936 ... \n", "NCT00001959 Both 936 ... \n", "NCT00003222 Both 936 ... \n", "\n", " NUMBER_OF_INVESTIGATORS PARTFLOW_COUNT_STARTED \\\n", "NCT_ID \n", "NCT00000378 0 1636 \n", "NCT00001656 0 1636 \n", "NCT00004859 0 1636 \n", "NCT00001959 0 1636 \n", "NCT00003222 0 1636 \n", "\n", " PARTFLOW_COUNT_NOT_COMPLETED PARTFLOW_COUNT_COMPLETED \\\n", "NCT_ID \n", "NCT00000378 477 1159 \n", "NCT00001656 477 1159 \n", "NCT00004859 477 1159 \n", "NCT00001959 477 1159 \n", "NCT00003222 477 1159 \n", "\n", " INTERVENTION_MODEL OBSERVATION_MODEL MASKING \\\n", "NCT_ID \n", "NCT00000378 Parallel Assignment NotDefined Double Blind \n", "NCT00001656 Parallel Assignment NotDefined Double Blind \n", "NCT00004859 Parallel Assignment NotDefined Open Label \n", "NCT00001959 Single Group Assignment NotDefined Open Label \n", "NCT00003222 Parallel Assignment NotDefined Open Label \n", "\n", " TIME_PERSPECTIVE PRIMARY_PURPOSE ENDPOINT_CLASSIFICATION \n", "NCT_ID \n", "NCT00000378 NotDefined Treatment Efficacy Study \n", "NCT00001656 NotDefined Treatment Efficacy Study \n", "NCT00004859 NotDefined Treatment Efficacy Study \n", "NCT00001959 NotDefined Treatment Safety/Efficacy Study \n", "NCT00003222 NotDefined Treatment Efficacy Study \n", "\n", "[5 rows x 33 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame.from_csv('ct_gov_results.csv')\n", "\n", "# Scrub / Edit Data\n", "df['MINIMUM_AGE'] = df['MINIMUM_AGE'].map(getAgeInWeeks)\n", "df['MAXIMUM_AGE'] = df['MAXIMUM_AGE'].map(getAgeInWeeks)\n", "\n", "df['OVERALL_STATUS'] = df['OVERALL_STATUS'].map(getNoDefined)\n", "df['PHASE'] = df['PHASE'].map(getNoDefined)\n", "df['STUDY_TYPE'] = df['STUDY_TYPE'].map(getNoDefined)\n", "df['ENROLLMENT_TYPE'] = df['ENROLLMENT_TYPE'].map(getNoDefined)\n", "df['GENDER'] = df['GENDER'].map(getNoDefined)\n", "df['INTERVENTION_TYPE'] = df['INTERVENTION_TYPE'].map(getNoDefined)\n", "df['OBSERVATION_MODEL'] = df['OBSERVATION_MODEL'].map(getNoDefined)\n", "df['MASKING'] = df['MASKING'].map(getNoDefined)\n", "df['TIME_PERSPECTIVE'] = df['TIME_PERSPECTIVE'].map(getNoDefined)\n", "df['PRIMARY_PURPOSE'] = df['PRIMARY_PURPOSE'].map(getNoDefined)\n", "df['ENDPOINT_CLASSIFICATION'] = df['ENDPOINT_CLASSIFICATION'].map(getNoDefined)\n", "\n", "df['NUMBER_OF_ARMS'] = df['NUMBER_OF_ARMS'].map(getZero)\n", "df['ENROLLMENT'] = df['ENROLLMENT'].map(getZero)\n", "df['NUMBER_OF_GROUPS'] = df['NUMBER_OF_GROUPS'].map(getZero)\n", "df['SERIOUS_TOTAL_SUB_AT_RISK'] = df['SERIOUS_TOTAL_SUB_AT_RISK'].map(getZero)\n", "df['SERIOUS_TOTAL_SUB_AFFECTED'] = df['SERIOUS_TOTAL_SUB_AFFECTED'].map(getZero)\n", "df['OTHER_TOTAL_SUB_AT_RISK'] = df['OTHER_TOTAL_SUB_AT_RISK'].map(getZero)\n", "df['OTHER_TOTAL_SUB_AFFECTED'] = df['OTHER_TOTAL_SUB_AFFECTED'].map(getZero)\n", "df['NUMBER_OF_COUNTRIES'] = df['NUMBER_OF_COUNTRIES'].map(getZero)\n", "df['NUMBER_OF_FACILITIES'] = df['NUMBER_OF_FACILITIES'].map(getZero)\n", "df['NUMBER_OF_INVESTIGATORS'] = df['NUMBER_OF_INVESTIGATORS'].map(getZero)\n", "df['PARTFLOW_COUNT_STARTED'] = df['PARTFLOW_COUNT_STARTED'].map(getZero)\n", "df['PARTFLOW_COUNT_NOT_COMPLETED'] = df['PARTFLOW_COUNT_NOT_COMPLETED'].map(getZero)\n", "df['PARTFLOW_COUNT_COMPLETED'] = df['PARTFLOW_COUNT_COMPLETED'].map(getZero)\n", "\n", "df['MAX_P_VALUE'] = df['MAX_P_VALUE'].map(getSignificance)\n", "df['MIN_P_VALUE'] = df['MIN_P_VALUE'].map(getSignificance)\n", "\n", "df.head()\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([u'CONDITION', u'ENDPOINT_CLASSIFICATION=Bio-equivalence Study',\n", " u'ENDPOINT_CLASSIFICATION=Efficacy Study',\n", " u'ENDPOINT_CLASSIFICATION=NotDefined',\n", " u'ENDPOINT_CLASSIFICATION=Pharmacodynamics Study',\n", " u'ENDPOINT_CLASSIFICATION=Pharmacokinetics Study',\n", " u'ENDPOINT_CLASSIFICATION=Pharmacokinetics/Dynamics Study',\n", " u'ENDPOINT_CLASSIFICATION=Safety Study',\n", " u'ENDPOINT_CLASSIFICATION=Safety/Efficacy Study', u'INTERVENTION_MODEL',\n", " u'INTERVENTION_MODEL=Crossover Assignment',\n", " u'INTERVENTION_MODEL=Factorial Assignment',\n", " u'INTERVENTION_MODEL=Parallel Assignment',\n", " u'INTERVENTION_MODEL=Single Group Assignment',\n", " u'INTERVENTION_TYPE=Behavioral', u'INTERVENTION_TYPE=Biological',\n", " u'INTERVENTION_TYPE=Device', u'INTERVENTION_TYPE=Dietary Supplement',\n", " u'INTERVENTION_TYPE=Drug', u'INTERVENTION_TYPE=Genetic',\n", " u'INTERVENTION_TYPE=Other', u'INTERVENTION_TYPE=Procedure',\n", " u'INTERVENTION_TYPE=Radiation', u'MASKING=Double Blind',\n", " u'MASKING=Double-Blind', u'MASKING=NotDefined', u'MASKING=Open Label',\n", " u'MASKING=Single Blind', u'OBSERVATION_MODEL=Case-Only',\n", " u'OBSERVATION_MODEL=Cohort', u'OBSERVATION_MODEL=NotDefined',\n", " u'PHASE=NotDefined', u'PHASE=Phase 1', u'PHASE=Phase 1/Phase 2',\n", " u'PHASE=Phase 2', u'PHASE=Phase 2/Phase 3', u'PHASE=Phase 3',\n", " u'PHASE=Phase 4', u'PRIMARY_PURPOSE=Basic Science',\n", " u'PRIMARY_PURPOSE=Diagnostic',\n", " u'PRIMARY_PURPOSE=Health Services Research',\n", " u'PRIMARY_PURPOSE=NotDefined', u'PRIMARY_PURPOSE=Prevention',\n", " u'PRIMARY_PURPOSE=Screening', u'PRIMARY_PURPOSE=Supportive Care',\n", " u'PRIMARY_PURPOSE=Treatment', u'STUDY_TYPE=Interventional',\n", " u'STUDY_TYPE=Observational', u'TIME_PERSPECTIVE=NotDefined',\n", " u'TIME_PERSPECTIVE=Prospective'],\n", " dtype='object')\n" ] } ], "source": [ "# Create a training and Test set\n", "enrollment_xform = getEnrollmentdf(df)\n", "\n", "a=enrollment_xform.drop('ENROLLMENT',1)\n", "b=a.drop('NUMBER_OF_ARMS',1)\n", "c=b.drop('GENDER=Female',1)\n", "d=c.drop('GENDER=Male',1)\n", "gender_removed_df=e=d.drop('GENDER=Both',1)\n", "\n", "\n", "# f=e.drop('ENDPOINT_CLASSIFICATION=Bio-equivalence Study',1)\n", "# g=f.drop('ENDPOINT_CLASSIFICATION=Efficacy Study',1)\n", "# h=g.drop('ENDPOINT_CLASSIFICATION=NotDefined',1)\n", "# i=h.drop('ENDPOINT_CLASSIFICATION=Pharmacodynamics Study',1)\n", "# j=i.drop('ENDPOINT_CLASSIFICATION=Pharmacokinetics Study',1)\n", "# k=j.drop('ENDPOINT_CLASSIFICATION=Pharmacokinetics/Dynamics Study',1)\n", "# l=k.drop('ENDPOINT_CLASSIFICATION=Safety Study',1)\n", "# m=l.drop('ENDPOINT_CLASSIFICATION=Safety/Efficacy Study',1)\n", "# n=m.drop('INTERVENTION_MODEL=Crossover Assignment',1)\n", "# o=n.drop('INTERVENTION_MODEL=Factorial Assignment',1)\n", "# p=o.drop('INTERVENTION_MODEL=Parallel Assignment',1)\n", "# q=p.drop('INTERVENTION_MODEL=Single Group Assignment',1)\n", "# r=q.drop('INTERVENTION_TYPE=Behavioral',1)\n", "# s=r.drop('INTERVENTION_TYPE=Biological',1)\n", "# t=s.drop('INTERVENTION_TYPE=Device',1)\n", "# u=t.drop('INTERVENTION_TYPE=Dietary Supplement',1)\n", "# v=u.drop('INTERVENTION_TYPE=Drug',1)\n", "# w=v.drop('INTERVENTION_TYPE=Genetic',1)\n", "# x=w.drop('INTERVENTION_TYPE=Other',1)\n", "# z=x.drop('INTERVENTION_TYPE=Procedure',1)\n", "# sd=z.drop('INTERVENTION_TYPE=Radiation',1)\n", "\n", "features_x_final=gender_removed_df\n", "\n", "print features_x_final.columns\n", "#features_x_final=enrollment_xform \n", "target_y=enrollment_xform['ENROLLMENT']\n", "enrollment_train_x, enrollment_test_x,enrollment_train_y, enrollment_test_y = train_test_split(features_x_final,target_y, test_size=0.4)\n", "#print(enrollment_xform)\n", "#enrollment_train_fit = enrollment_train.drop('ENROLLMENT',1)\n", "#print(enrollment_train_fit)\n", "#enrollment_test_fit = enrollment_test.drop('ENROLLMENT',1)\n", "#print(enrollment_test_fit)\n", "#print(enrollment_train_x)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [('CONDITION', -0.052623465656282391), ('ENDPOINT_CLASSIFICATION=Bio-equivalence Study', 1473.4077220110717), ('ENDPOINT_CLASSIFICATION=Efficacy Study', 5.5788731294576337), ('ENDPOINT_CLASSIFICATION=NotDefined', 579.20505944096317), ('ENDPOINT_CLASSIFICATION=Pharmacodynamics Study', -256.59654967940628), ('ENDPOINT_CLASSIFICATION=Pharmacokinetics Study', -795.02260805353853), ('ENDPOINT_CLASSIFICATION=Pharmacokinetics/Dynamics Study', -457.45498224381345), ('ENDPOINT_CLASSIFICATION=Safety Study', -416.1007223751094), ('ENDPOINT_CLASSIFICATION=Safety/Efficacy Study', -133.01679223003021), ('INTERVENTION_MODEL', 8.4241946751717478e-11), ('INTERVENTION_MODEL=Crossover Assignment', 337.23794053435233), ('INTERVENTION_MODEL=Factorial Assignment', 3516.3866986005396), ('INTERVENTION_MODEL=Parallel Assignment', 552.45548220837134), ('INTERVENTION_MODEL=Single Group Assignment', 443.76420572798861), ('INTERVENTION_TYPE=Behavioral', -1596.1303393179007), ('INTERVENTION_TYPE=Biological', -596.24964033898527), ('INTERVENTION_TYPE=Device', -2028.6251149486716), ('INTERVENTION_TYPE=Dietary Supplement', 12135.18687854545), ('INTERVENTION_TYPE=Drug', -1128.9596362999278), ('INTERVENTION_TYPE=Genetic', -1065.9252685808187), ('INTERVENTION_TYPE=Other', -1701.3664352141795), ('INTERVENTION_TYPE=Procedure', -2217.7952706546193), ('INTERVENTION_TYPE=Radiation', -1800.1351731905445), ('MASKING=Double Blind', -1683.9809641726924), ('MASKING=Double-Blind', -1193.8460898600563), ('MASKING=NotDefined', 6138.3132212548562), ('MASKING=Open Label', -1607.9106242676999), ('MASKING=Single Blind', -1652.5755429541746), ('OBSERVATION_MODEL=Case-Only', -10557.877459727495), ('OBSERVATION_MODEL=Cohort', 6077.5577815464239), ('OBSERVATION_MODEL=NotDefined', 4480.3196781810739), ('PHASE=NotDefined', -31.166863369909152), ('PHASE=Phase 1', 303.94203287575101), ('PHASE=Phase 1/Phase 2', -341.38443010037537), ('PHASE=Phase 2', -253.41711554165636), ('PHASE=Phase 2/Phase 3', -635.2741784585703), ('PHASE=Phase 3', 634.28128498967408), ('PHASE=Phase 4', 323.01926960495882), ('PRIMARY_PURPOSE=Basic Science', -6419.5711007313175), ('PRIMARY_PURPOSE=Diagnostic', -5976.833023225754), ('PRIMARY_PURPOSE=Health Services Research', -6559.5595885782486), ('PRIMARY_PURPOSE=NotDefined', -6674.4286695088285), ('PRIMARY_PURPOSE=Prevention', -5125.6150448601475), ('PRIMARY_PURPOSE=Screening', 46008.840801066464), ('PRIMARY_PURPOSE=Supportive Care', -8535.8615238672028), ('PRIMARY_PURPOSE=Treatment', -6716.97185029498), ('STUDY_TYPE=Interventional', 4480.3196781810202), ('STUDY_TYPE=Observational', -4480.3196781810511), ('TIME_PERSPECTIVE=NotDefined', -8410.4806608890085), ('TIME_PERSPECTIVE=Prospective', 8410.4806608889758)]\n", "\n", "Intercept= 8779.85537523\n", "\n", "length of test_data (15295, 50)\n", "length of predicted_data (15295L,)\n", "\n", "Predicted Values= [-427.99513604 844.34937109 -734.62952914 ..., -185.36488066 989.25985232\n", " -36.19223812]\n", "Root Mean Square Error= 3046.69264922\n" ] } ], "source": [ "linreg=LinearRegression()\n", "linreg.fit(enrollment_train_x, enrollment_train_y)\n", "#print(\"Coeficient=\"+linreg.coef_)\n", "print zip(features_x_final,linreg.coef_)\n", "print\"\\nIntercept=\",linreg.intercept_\n", "predicted_value=linreg.predict(enrollment_test_x)\n", "print \"\\nlength of test_data\",enrollment_test_x.shape\n", "print \"length of predicted_data\",predicted_value.shape\n", "print \"\\nPredicted Values=\",predicted_value\n", "print \"Root Mean Square Error=\",np.sqrt(mean_squared_error(enrollment_test_y, predicted_value)) \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }