{"id":1324,"date":"2025-01-05T18:51:33","date_gmt":"2025-01-05T18:51:33","guid":{"rendered":"https:\/\/cyberenlightener.com\/?page_id=1324"},"modified":"2025-10-03T09:56:25","modified_gmt":"2025-10-03T08:56:25","slug":"predictive-analytics","status":"publish","type":"page","link":"https:\/\/cyberenlightener.com\/?page_id=1324","title":{"rendered":"Predictive-Analytics"},"content":{"rendered":"\n<div class=\"wp-block-columns are-vertically-aligned-top is-layout-flex wp-container-core-columns-is-layout-1 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<ul class=\"wp-block-list\">\n<li><strong>Introduction to Analytics<\/strong> &#8211;\n<ul class=\"wp-block-list\">\n<li><strong><a href=\"https:\/\/www.ibm.com\/think\/topics\/business-analytics\">Business Analytics<\/a> , Models-<a href=\"https:\/\/en.wikipedia.org\/wiki\/Predictive_modelling\" data-type=\"link\" data-id=\"https:\/\/en.wikipedia.org\/wiki\/Predictive_modelling\">Predictive<\/a>, <a href=\"https:\/\/www.sciencedirect.com\/topics\/mathematics\/descriptive-model\" data-type=\"link\" data-id=\"https:\/\/www.sciencedirect.com\/topics\/mathematics\/descriptive-model\">Descriptive<\/a>, &amp; <a href=\"#AprioriAlgorithm\">Decision Models(AprioriAlgorithm)<\/a>,<\/strong><\/li>\n\n\n\n<li>Analytical Techniques<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>Data Prep &amp; Tuning<\/strong>&#8211;\n<ul class=\"wp-block-list\">\n<li><strong>Data Transformations (<a href=\"https:\/\/www.sciencedirect.com\/topics\/computer-science\/single-predictor\">Single<\/a> &amp; <a href=\"https:\/\/www.sciencedirect.com\/topics\/mathematics\/multiple-predictor\">Multiple Predictors<\/a>)<\/strong><\/li>\n\n\n\n<li><strong><a href=\"#miss\" data-type=\"internal\" data-id=\"#miss\">Handling Missing Values <\/a>(<a href=\"#rem\" data-type=\"internal\" data-id=\"#rem\">Removal<\/a>, <a href=\"#imput\" data-type=\"internal\" data-id=\"#imput\">Imputation<\/a>, <a href=\"https:\/\/www.naukri.com\/code360\/library\/binning-in-data-mining\" data-type=\"internal\" data-id=\"#bin\">Binning<\/a>) Model Tuning, <a href=\"https:\/\/topepo.github.io\/caret\/data-splitting.html\">Data Splitting<\/a> &amp; <a href=\"https:\/\/www.kdnuggets.com\/2023\/02\/role-resampling-techniques-data-science.html\">Resampling<\/a><\/strong><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<p><strong>Predictive Modeling <\/strong>&#8211;<\/p>\n\n\n\n<p><strong><a href=\"#Propensity\" data-type=\"internal\" data-id=\"#Propensity\">Propensity<\/a>, <a href=\"#cluster\" data-type=\"internal\" data-id=\"#cluster\">Cluster<\/a>,<\/strong> &amp; <strong><a href=\"#col\" data-type=\"internal\" data-id=\"#col\">Collaborative Filtering Models,<\/a><\/strong> Statistical Modeling<\/p>\n\n\n\n<p><strong>Regression Model Comparison &#8211;<\/strong>  <\/p>\n\n\n\n<p><strong><a href=\"#lin\" data-type=\"internal\" data-id=\"#lin\">Linear<\/a><\/strong>(Fitting a st line using the least square method), <strong><a href=\"#metricsreg\" data-type=\"internal\" data-id=\"#metricsreg\">Measurement of metrics(Linear Regression)<\/a><\/strong>,<strong><a href=\"#multiple\" data-type=\"internal\" data-id=\"#multiple\">Multiple Linear Regression,<\/a><\/strong> <strong><a href=\"#Logistic\" data-type=\"internal\" data-id=\"#Logistic\">Logistic Regression <\/a><\/strong>&amp; <strong><a href=\"#NonlinearReg\">Non-Linear Regression<\/a><\/strong><\/p>\n\n\n\n<p><a href=\"#Regression\" data-type=\"internal\" data-id=\"#Regression\"><strong>Regression Trees<\/strong><\/a> &amp; Rules<\/p>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<ul class=\"wp-block-list\">\n<li id=\"non\"><strong>Classification Model Comparison<\/strong>&#8211; Classification Model Performance &#8211; <strong><a href=\"#SVM\" data-type=\"internal\" data-id=\"#SVM\">Linear(Support Vector Machine-SVM<\/a><\/strong> &amp; Non-Linear Classification(<strong><a href=\"#nonlinearsvm\" data-type=\"internal\" data-id=\"#nonlinearsvm\">SVM-Non Linear<\/a><\/strong>)(<strong><a href=\"#NN-A\">NN-Part-A<\/a><\/strong> &amp; <strong><a href=\"#NN-B\" data-type=\"internal\" data-id=\"#NN-B\">NN-Part-B<\/a><\/strong>, <strong><a href=\"https:\/\/mattmazur.com\/2015\/03\/17\/a-step-by-step-backpropagation-example\/\">Backpropagation<\/a>,<\/strong> Classification Trees(<strong><a href=\"#C4.5\" data-type=\"internal\" data-id=\"#C4.5\">Decision Tree ; C4.5<\/a><\/strong> &amp; Rules- <strong><a href=\"#metrics\" data-type=\"internal\" data-id=\"#metrics\">Model Evaluation(Classification)<\/a><\/strong><\/li>\n\n\n\n<li><strong>Addressing Class Imbalance <\/strong>&#8211; <strong><a href=\"#imbalance\" data-type=\"internal\" data-id=\"#imbalance\">Impact of Class Imbalance<\/a>, <\/strong>Model Tuning &amp; Adjustments <a href=\"#sampling\" data-type=\"internal\" data-id=\"#sampling\"><strong>Sampling Methods<\/strong><\/a> * Cost-Sensitive Training * Predictor Importance &amp; Model Performance Factors. <strong><a href=\"#time\" data-type=\"internal\" data-id=\"#time\">Time Series Analysis<\/a><\/strong><\/li>\n<\/ul>\n<\/div>\n<\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>AprioriAlgorithm<\/strong><\/h2>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-1 is-layout-flex wp-block-gallery-is-layout-flex\"><\/figure>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Apriori-Algorithm-Tutorial-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Apriori Algorithm Tutorial (1).\"><\/object><a id=\"wp-block-file--media-3521f053-2b97-44bf-a571-540de0c20bc2\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Apriori-Algorithm-Tutorial-1.pdf\">Apriori Algorithm Tutorial (1)<\/a><\/div>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-2 is-layout-flex wp-block-gallery-is-layout-flex\"><\/figure>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/Single-vs-Multiple-Predictors.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Single vs Multiple Predictors.\"><\/object><a id=\"wp-block-file--media-0593edf3-741c-49b7-bd06-f707f3499119\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/Single-vs-Multiple-Predictors.pdf\">Single vs Multiple Predictors<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/Single-vs-Multiple-Predictors.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-0593edf3-741c-49b7-bd06-f707f3499119\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>VARIOUS NORMALIZATION TECHNIQUES<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of SSROY_1 (1).\"><\/object><a id=\"wp-block-file--media-55092261-a17c-4d6d-8b67-c1c2e040d5f3\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_1-1.pdf\">SSROY_1 (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-55092261-a17c-4d6d-8b67-c1c2e040d5f3\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Transformations to Resolve Skewness<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_2-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of SSROY_2 (1).\"><\/object><a id=\"wp-block-file--media-ea628c37-86a8-4c03-b147-a54c61e9bf7a\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_2-1.pdf\">SSROY_2 (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/SSROY_2-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-ea628c37-86a8-4c03-b147-a54c61e9bf7a\">Download<\/a><\/div>\n\n\n\n<p><strong>Dealing with Missing Values<\/strong><\/p>\n\n\n\n<p id=\"miss\">In many situations, certain predictors lack values for specific samples. These missing values might be <strong>structurally missing<\/strong>, such as the number of children a man has biologically given birth to. Alternatively, some data points might be unavailable or were not recorded during the model development phase. Understanding the reasons behind missing values is crucial. The first step is to determine whether the missing data pattern is connected to the outcome. This phenomenon, known as &#8220;informative missingness,&#8221; implies that the absence of data itself carries meaningful insights. Such <strong>informative missingness<\/strong> can introduce substantial bias into the model. For instance, an earlier example discussed predicting a patient\u2019s response to a drug. If the drug is highly ineffective or causes severe side effects, patients may skip doctor visits or withdraw from the study. Here, the likelihood of missing values is directly linked to the treatment&#8217;s impact.<\/p>\n\n\n\n<p><strong>Informative missingness<\/strong> is also evident in customer ratings, where individuals are more likely to provide feedback when they hold strong opinions\u2014whether positive or negative. This often leads to a polarized dataset, with few ratings falling in the middle range. A notable instance of this occurred during the Netflix Prize competition, where participants aimed to predict user preferences for movies based on prior ratings. The &#8220;Napoleon Dynamite Effect&#8221; posed challenges for many competitors, as individuals who rated the movie tended to either love it or hate it, creating a <strong>highly skewed dataset<\/strong>.<\/p>\n\n\n\n<p>It is important to distinguish between <strong>missing data and censored data<\/strong>, as the two are not the same. Censored data refers to situations where the exact value is unknown, but some information about it is still available. For instance, a company that rents DVDs by mail might include the duration a customer keeps a movie in its models. If the movie has not been returned, the exact time cannot be determined, but it is known to be at least as long as the current duration. Similarly, censored data is common in laboratory measurements, where certain tests cannot detect values below a specific threshold. In these cases, it is clear the value is less than the detection limit, but its precise amount remains unmeasured.<\/p>\n\n\n\n<p>How are <strong>censored data<\/strong> handled differently from <strong>missing data<\/strong>? In traditional statistical modeling aimed at interpretation or inference, censoring is typically addressed formally by making assumptions about the underlying mechanism. However, in predictive modeling, censored data are often treated as if they were simply missing, or the censored value itself is used as a substitute. For example, when a measurement falls below the detection limit, the limit value may be used as a stand-in for the actual measurement. Alternatively, a random value between zero and the detection limit might be assigned to represent the censored data. <\/p>\n\n\n\n<p>There are cases where the missing values might be concentrated in specific samples. For large data sets, removal of samples based on missing values is not a problem, assuming that the <strong>missingness<\/strong> is not informative. In smaller data sets, there is a steep price in removing samples; some of the alternative approaches described below may be more appropriate. If we do not remove the missing data, there are two general approaches. First, a few predictive models, especially tree-based techniques, can specifically account for missing data. Alternatively, missing data can be imputed. In this case, we can use information in the training set predictors to, in essence, estimate the values of other predictors. This amounts to a predictive model within a predictive model. <\/p>\n\n\n\n<p><strong>Imputation <\/strong>has been widely explored in statistical research, primarily in the context of developing accurate hypothesis testing procedures when faced with missing data. However, this differs from the objectives of predictive modeling, where the focus is on improving prediction accuracy rather than ensuring valid inferences. Research specifically addressing imputation in predictive models is relatively limited. For instance, Saar-Tsechansky and Provost (2007b) investigated how different models handle missing values, while <strong>Jerez et al. (2010)<\/strong> examined various imputation techniques for a specific dataset.<\/p>\n\n\n\n<p>As previously mentioned,<strong> imputation adds an extra layer of modeling<\/strong>, where missing values in predictors are estimated based on information from other predictors. A typical method involves creating an imputation model for each predictor using the training set. Before training the primary model or making predictions on new data, these imputation models are used to fill in missing values. It is important to acknowledge that this additional layer of modeling introduces uncertainty. When resampling techniques are employed to select tuning parameters or assess performance, it is essential to incorporate the imputation process within the resampling framework. While this approach increases the computational demands of model building, it provides more accurate performance estimates.<\/p>\n\n\n\n<p>If only a few predictors are affected by missing values, an exploratory analysis to investigate relationships among predictors can be advantageous. Techniques such as visualization or <strong>PCA <\/strong>can help uncover strong correlations between variables. For predictors with missing values that are closely correlated with those having few missing values, a focused imputation model can often produce effective outcomes, as demonstrated in the example below.<\/p>\n\n\n\n<p>A commonly used method for imputation is the <strong>K-nearest neighbor (KNN) <\/strong>model. In this approach, missing values for a new sample are estimated by identifying the samples in the training set that are &#8220;closest&#8221; to it and averaging the values of these neighboring points to fill in the missing data. Troyanskaya et al. (2001) explored this method, particularly for high-dimensional datasets with small sample sizes.<\/p>\n\n\n\n<p>One advantage of the KNN approach is that the imputed values remain within the range of the training set, preserving the data&#8217;s original scale. However, a notable drawback is the need to reference the entire training set each time a missing value is imputed, which can be computationally demanding. Additionally, this method requires tuning key parameters, such as the number of neighbors and the technique used to determine proximity.  <\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"702\" height=\"485\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Imputed.png\" alt=\"\" class=\"wp-image-1325\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Imputed.png 702w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Imputed-300x207.png 300w\" sizes=\"auto, (max-width: 702px) 100vw, 702px\" \/><\/figure>\n\n\n\n<p>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; The determination of <strong>&#8220;closeness&#8221;<\/strong> between two points is another important aspect of the K-nearest neighbor method. However, Troyanskaya et al. (2001) observed that the nearest neighbor technique demonstrates considerable robustness to variations in these tuning parameters and the proportion of missing data. <\/p>\n\n\n\n<p>A predictor measuring cell perimeter was previously used to illustrate<strong> skewness. <\/strong>To demonstrate the <strong>K-nearest neighbor method, a 5-nearest neighbor<\/strong> model was built using the training set values. Missing values were artificially introduced into 50 test set cell perimeter values, which were subsequently imputed using this model. The results are displayed in the left-hand panel of above Fig, where the scatter plot illustrates the imputation&#8217;s effectiveness. The model achieved a high correlation of 0.91 between the actual and imputed values, indicating strong predictive performance. Alternatively, a simpler strategy can be applied for imputing cell perimeter values. Cell fiber length, another predictor related to cell size, has a very high correlation (0.99) with cell perimeter data. By employing a straightforward linear regression model, it is possible to predict the missing values. The results of this method are shown in the right-hand panel of above Fig. where the correlation between actual and imputed values is 0.85.<\/p>\n\n\n\n<p>Here&#8217;s a simple example of how to handle missing values in Python using the popular <code>pandas<\/code> library. In this example, we will create a dataset with missing values and then handle them using imputation (filling missing values with the mean of the column).<\/p>\n\n\n\n<h6 class=\"wp-block-heading\">Step-by-Step Example:<\/h6>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Create a DataFrame with missing values<\/strong>.<\/li>\n\n\n\n<li><strong>Impute missing values<\/strong> by filling them with the mean of the respective columns<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\n\n# Step 1: Create a DataFrame with missing values\ndata = {\n    'Name': &#91;'Alice', 'Bob', 'Charlie', 'David', 'Eva'],\n    'Age': &#91;25, None, 30, None, 22],\n    'Salary': &#91;50000, 60000, None, 55000, 70000]\n}\n\ndf = pd.DataFrame(data)\n\nprint(\"Original DataFrame with missing values:\")\nprint(df)\n\n# Step 2: Impute missing values with the mean of each column\ndf&#91;'Age'].fillna(df&#91;'Age'].mean(), inplace=True)\ndf&#91;'Salary'].fillna(df&#91;'Salary'].mean(), inplace=True)\n\nprint(\"\\nDataFrame after imputation:\")\nprint(df)\n\nOriginal DataFrame with missing values:\n      Name   Age   Salary\n0    Alice  25.0  50000.0\n1      Bob   NaN  60000.0\n2  Charlie  30.0      NaN\n3    David   NaN  55000.0\n4      Eva  22.0  70000.0\n\nDataFrame after imputation:\n      Name   Age   Salary\n0    Alice  25.0  50000.0\n1      Bob  26.75  60000.0\n2  Charlie  30.0  61250.0\n3    David  26.75  55000.0\n4      Eva  22.0  70000.0<\/code><\/pre>\n\n\n\n<h6 class=\"wp-block-heading\">Explanation:<\/h6>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Creating the DataFrame<\/strong>: The <code>Age<\/code> and <code>Salary<\/code> columns have some missing values (<code>NaN<\/code>).<\/li>\n\n\n\n<li><strong>Imputation<\/strong>: The missing values in <code>Age<\/code> are filled with the column&#8217;s mean (26.75), and the missing values in <code>Salary<\/code> are filled with the mean of that column (61250).<\/li>\n<\/ol>\n\n\n\n<p>Here\u2019s an extended version of the Python code with additional methods for handling missing values, including:<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Imputation using Mean, Median, and Mode<\/strong>.<\/li>\n\n\n\n<li><strong>Dropping rows or columns with missing values<\/strong>.<\/li>\n\n\n\n<li><strong>Filling missing values with forward-fill or backward-fill<\/strong>.<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\n\n# Step 1: Create a DataFrame with missing values\ndata = {\n    'Name': &#91;'Alice', 'Bob', 'Charlie', 'David', 'Eva'],\n    'Age': &#91;25, None, 30, None, 22],\n    'Salary': &#91;50000, 60000, None, 55000, 70000],\n    'Department': &#91;'HR', None, 'Engineering', 'Marketing', 'Finance']\n}\n\ndf = pd.DataFrame(data)\n\nprint(\"Original DataFrame with missing values:\")\nprint(df)\n\n# Step 2: Impute missing values with different strategies\n\n# Impute missing values in the 'Age' column with the mean\ndf&#91;'Age'].fillna(df&#91;'Age'].mean(), inplace=True)\nprint(\"\\nDataFrame after filling missing 'Age' with mean:\")\nprint(df)\n\n# Impute missing values in the 'Salary' column with the median\ndf&#91;'Salary'].fillna(df&#91;'Salary'].median(), inplace=True)\nprint(\"\\nDataFrame after filling missing 'Salary' with median:\")\nprint(df)\n\n# Impute missing values in the 'Department' column with the mode (most frequent value)\ndf&#91;'Department'].fillna(df&#91;'Department'].mode()&#91;0], inplace=True)\nprint(\"\\nDataFrame after filling missing 'Department' with mode:\")\nprint(df)\n\n# Step 3: Drop rows with missing values\ndf_dropped_rows = df.dropna()\nprint(\"\\nDataFrame after dropping rows with missing values:\")\nprint(df_dropped_rows)\n\n# Step 4: Drop columns with missing values\ndf_dropped_columns = df.dropna(axis=1)\nprint(\"\\nDataFrame after dropping columns with missing values:\")\nprint(df_dropped_columns)\n\n# Step 5: Forward-fill (propagate last valid value)\ndf_forward_fill = df.fillna(method='ffill')\nprint(\"\\nDataFrame after forward-filling missing values:\")\nprint(df_forward_fill)\n\n# Step 6: Backward-fill (use next valid value)\ndf_backward_fill = df.fillna(method='bfill')\nprint(\"\\nDataFrame after backward-filling missing values:\")\nprint(df_backward_fill)\n<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>Output : \nOriginal DataFrame with missing values:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob   NaN  60000.0         None\n2  Charlie  30.0      NaN  Engineering\n3    David   NaN  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after filling missing 'Age' with mean:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob  26.75  60000.0         None\n2  Charlie  30.0      NaN  Engineering\n3    David  26.75  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after filling missing 'Salary' with median:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob  26.75  60000.0         None\n2  Charlie  30.0  61250.0  Engineering\n3    David  26.75  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after filling missing 'Department' with mode:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob  26.75  60000.0          HR\n2  Charlie  30.0  61250.0  Engineering\n3    David  26.75  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after dropping rows with missing values:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n2  Charlie  30.0  61250.0  Engineering\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after dropping columns with missing values:\n      Name\n0    Alice\n1      Bob\n2  Charlie\n3    David\n4      Eva\n\nDataFrame after forward-filling missing values:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob  26.75  60000.0          HR\n2  Charlie  30.0  61250.0  Engineering\n3    David  26.75  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance\n\nDataFrame after backward-filling missing values:\n      Name   Age   Salary   Department\n0    Alice  25.0  50000.0          HR\n1      Bob  26.75  60000.0  Engineering\n2  Charlie  30.0  61250.0  Engineering\n3    David  26.75  55000.0    Marketing\n4      Eva  22.0  70000.0      Finance<\/code><\/pre>\n\n\n\n<h6 class=\"wp-block-heading\">Explanation of Additions:<\/h6>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Filling with Mean, Median, and Mode<\/strong>: Missing values in <code>Age<\/code>, <code>Salary<\/code>, and <code>Department<\/code> are filled with the mean, median, and mode of their respective columns, respectively.<\/li>\n\n\n\n<li><strong>Dropping Rows or Columns<\/strong>: Rows or columns containing missing values are removed using the <code>dropna()<\/code> method.<\/li>\n\n\n\n<li><strong>Forward-fill and Backward-fill<\/strong>: The <code>fillna(method='ffill')<\/code> method propagates the last valid value to fill in the missing values, while <code>fillna(method='bfill')<\/code> uses the next valid value.<\/li>\n<\/ol>\n\n\n\n<p><strong>Removing predictors<\/strong> before model training can provide multiple benefits. Firstly, a reduction in the number of predictors reduces both the computational cost and the complexity of the model, making it faster and more efficient to train.<\/p>\n\n\n\n<h5 class=\"wp-block-heading has-text-align-center\"><strong>Removing Predictors<\/strong><\/h5>\n\n\n\n<p id=\"rem\">Secondly, when two predictors are <strong>highly correlated<\/strong>, it suggests they are capturing similar underlying information. In such cases, removing one predictor should not harm the model&#8217;s performance but might lead to a simpler and more interpretable model.<\/p>\n\n\n\n<p><strong>Thirdly,<\/strong> certain models might struggle with predictors exhibiting degenerate distributions or those with only a few unique values. In these situations, removing these predictors can lead to improved model stability and performance.<\/p>\n\n\n\n<p>For example, consider a predictor variable that describes the frequency of keyword occurrences across several documents. In a dataset with 531 documents, most of the documents (523) do not contain the keyword, while only a small number have a few occurrences. Such a variable would have little impact on certain models, like tree-based models, which would ignore it during splits. However, models like linear regression could face issues with these kinds of predictors due to computational difficulties. In these cases, the uninformative variable can be safely removed.<\/p>\n\n\n\n<p>Moreover, some predictors may have only a small number of unique values that occur infrequently. These &#8220;near-zero variance predictors&#8221; might have a dominant value in most cases, but very few other values, which can disproportionately influence the model.<\/p>\n\n\n\n<p>In scenarios such as text mining, where keyword counts are recorded across a large number of documents, predictors with very low variance could be problematic. For example, a keyword might appear in only a few documents, leading to an imbalanced distribution of values. A potential distribution could look like this: of 531 documents, 523 have zero occurrences of the keyword, 6 documents have two occurrences, and only 1 or 2 documents contain more. This type of distribution is often skewed, where one value occurs much more frequently than others, resulting in an imbalance that could negatively affect model performance.<\/p>\n\n\n\n<p>To detect near-zero variance predictors, it&#8217;s crucial to check the proportion of unique values relative to the sample size. In the document example, only four unique counts exist across 531 documents, which results in 0.8% unique values. Although a small percentage of unique values in itself may not be an issue, the disproportionate frequency of certain values can signal that the predictor is unhelpful. A rule of thumb for diagnosing such issues is to examine the ratio of the most frequent value&#8217;s occurrences to the second most frequent. If this ratio is too large, the predictor is likely to be <strong>near-zero variance<\/strong>.<\/p>\n\n\n\n<p>By detecting and removing such predictors, the model can be made more efficient, reducing the risk of overfitting and improving interpretability. Additionally, this approach helps avoid numerical issues in certain models that are sensitive to degenerate predictors.<\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"664\" height=\"325\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Predictor.png\" alt=\"\" class=\"wp-image-1329\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Predictor.png 664w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/Predictor-300x147.png 300w\" sizes=\"auto, (max-width: 664px) 100vw, 664px\" \/><\/figure>\n\n\n\n<p>The fraction of unique values over the sample size is low (say 10 %). <\/p>\n\n\n\n<p>\u2022 The ratio of the frequency of the most prevalent value to the frequency of the second most prevalent value is large (say around 20). If both of these criteria are true and the model in question is susceptible to this type of predictor, it may be advantageous to remove the variable from the model.<\/p>\n\n\n\n<h5 class=\"wp-block-heading has-text-align-center\"><strong>Between-Predictor Correlations<\/strong><\/h5>\n\n\n\n<p><strong>Collinearity <\/strong>refers to a situation where predictor variables are highly correlated. When multiple predictors show strong correlations, it\u2019s known as multicollinearity. For instance, in cell segmentation data, predictors like cell perimeter, width, and length are correlated, reflecting cell size and morphology (e.g., roughness).<\/p>\n\n\n\n<p>A correlation matrix visually shows the strength of pairwise correlations, with dark blue indicating strong positive correlations, dark red for negative correlations, and white for no relationship. Grouping collinear predictors using clustering techniques helps identify correlated clusters.<\/p>\n\n\n\n<p>If many predictors make visual inspection difficult, techniques like PCA can highlight major correlations, such as when the first principal component captures most of the variance, indicating redundancy in predictors.<\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"664\" height=\"530\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/collinerity.png\" alt=\"\" class=\"wp-image-1330\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/collinerity.png 664w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/collinerity-300x239.png 300w\" sizes=\"auto, (max-width: 664px) 100vw, 664px\" \/><\/figure>\n\n\n\n<p>                                               Reference: Kuhn, M. &#8220;Applied predictive modeling.&#8221; (2013). Page No : 45-47<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\ud83d\udd0d <strong>Between-Predictor Heuristic Approach<\/strong><\/h3>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Betweeness_Predictor-SSROY25.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Betweeness_Predictor-SSROY25.\"><\/object><a id=\"wp-block-file--media-20454026-af65-48df-b7fd-8829f234cf78\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Betweeness_Predictor-SSROY25.pdf\">Betweeness_Predictor-SSROY25<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Betweeness_Predictor-SSROY25.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-20454026-af65-48df-b7fd-8829f234cf78\">Download<\/a><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><\/h3>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"imput\"><strong>Missing Value Imputation by KNN<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-group is-layout-constrained wp-block-group-is-layout-constrained\">\n<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-2 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:100%\">\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-3 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"818\" height=\"1024\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-818x1024.jpeg\" alt=\"\" class=\"wp-image-1521\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-818x1024.jpeg 818w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-240x300.jpeg 240w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-768x961.jpeg 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42.jpeg 1023w\" sizes=\"auto, (max-width: 818px) 100vw, 818px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"849\" height=\"1024\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-1-849x1024.jpeg\" alt=\"\" class=\"wp-image-1523\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-1-849x1024.jpeg 849w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-1-249x300.jpeg 249w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-1-768x927.jpeg 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-1.jpeg 1061w\" sizes=\"auto, (max-width: 849px) 100vw, 849px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"854\" height=\"1024\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-2-854x1024.jpeg\" alt=\"\" class=\"wp-image-1522\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-2-854x1024.jpeg 854w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-2-250x300.jpeg 250w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-2-768x921.jpeg 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/WhatsApp-Image-2025-01-17-at-11.08.42-2.jpeg 1067w\" sizes=\"auto, (max-width: 854px) 100vw, 854px\" \/><\/figure>\n<\/figure>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\"><\/div>\n<\/div>\n<\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/DATA_PREPROCESSING-1SSROY.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of DATA_PREPROCESSING-1SSROY.\"><\/object><a id=\"wp-block-file--media-bc5db65c-63aa-4c71-b81b-a891b49a1b03\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/DATA_PREPROCESSING-1SSROY.pdf\">DATA_PREPROCESSING-1SSROY<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/DATA_PREPROCESSING-1SSROY.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-bc5db65c-63aa-4c71-b81b-a891b49a1b03\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Data-preprocessing-tutorial-SSROY-2025LINREG-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Data preprocessing tutorial-SSROY-2025LINREG (1).\"><\/object><a id=\"wp-block-file--media-06d713b8-12f0-479f-8660-f7b6230c0459\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Data-preprocessing-tutorial-SSROY-2025LINREG-1.pdf\">Data preprocessing tutorial-SSROY-2025LINREG (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Data-preprocessing-tutorial-SSROY-2025LINREG-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-06d713b8-12f0-479f-8660-f7b6230c0459\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Data Splitting by InterQuartile Range(IQR)<\/strong><\/h2>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"500\" height=\"545\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/500px-Boxplot_vs_PDF.svg_.png\" alt=\"\" class=\"wp-image-2066\" style=\"width:625px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/500px-Boxplot_vs_PDF.svg_.png 500w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/500px-Boxplot_vs_PDF.svg_-275x300.png 275w\" sizes=\"auto, (max-width: 500px) 100vw, 500px\" \/><\/figure>\n\n\n\n<p>By Jhguch at en.wikipedia, CC BY-SA 2.5, https:\/\/commons.wikimedia.org\/w\/index.php?curid=14524285<\/p>\n\n\n\n<p>These quartiles are denoted by&nbsp;<em>Q<\/em><sub>1<\/sub>&nbsp;(also called the lower quartile),&nbsp;<em>Q<\/em><sub>2<\/sub>&nbsp;(the&nbsp;<a href=\"https:\/\/en.wikipedia.org\/wiki\/Median\">median<\/a>), and&nbsp;<em>Q<\/em><sub>3<\/sub>&nbsp;(also called the upper quartile).&nbsp;<\/p>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/IQR.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of IQR.\"><\/object><a id=\"wp-block-file--media-c6529eff-13b9-4446-9e0f-3b824280a43f\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/IQR.pdf\">IQR<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/IQR.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-c6529eff-13b9-4446-9e0f-3b824280a43f\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Principal Component Analysis<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Covariance-matrix-calculation-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Covariance matrix calculation (1).\"><\/object><a id=\"wp-block-file--media-10a72544-b88c-49cb-9f77-8c81cbb21b19\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Covariance-matrix-calculation-1.pdf\">Covariance matrix calculation (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Covariance-matrix-calculation-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-10a72544-b88c-49cb-9f77-8c81cbb21b19\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of PCA-1.\"><\/object><a id=\"wp-block-file--media-e955bdea-514e-4225-9343-affb0e204414\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA-1-1.pdf\">PCA-1<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-e955bdea-514e-4225-9343-affb0e204414\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA_SSROY_MATH_BACKGROUND.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of PCA_SSROY_MATH_BACKGROUND.\"><\/object><a id=\"wp-block-file--media-d77b0327-482b-4918-b47d-8ca5e706658f\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA_SSROY_MATH_BACKGROUND.pdf\">PCA_SSROY_MATH_BACKGROUND<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/PCA_SSROY_MATH_BACKGROUND.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-d77b0327-482b-4918-b47d-8ca5e706658f\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"696\" height=\"438\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca.png\" alt=\"\" class=\"wp-image-2040\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca.png 696w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca-300x189.png 300w\" sizes=\"auto, (max-width: 696px) 100vw, 696px\" \/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Solved Examples : PCA<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca_4thAug.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of pca_4thAug.\"><\/object><a id=\"wp-block-file--media-4be1419d-ee04-4f7d-b1d5-01577d4b0cd9\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca_4thAug.pdf\">pca_4thAug<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/pca_4thAug.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-4be1419d-ee04-4f7d-b1d5-01577d4b0cd9\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>PCA with Kernel<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/PCA_KERNEL.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of PCA_KERNEL.\"><\/object><a id=\"wp-block-file--media-5ebf4090-8956-4980-8a96-1316c17ef277\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/PCA_KERNEL.pdf\">PCA_KERNEL<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/PCA_KERNEL.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-5ebf4090-8956-4980-8a96-1316c17ef277\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-image size-large is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"469\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/pcakernelssroy-1024x469.png\" alt=\"\" class=\"wp-image-2286\" style=\"width:1200px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/pcakernelssroy-1024x469.png 1024w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/pcakernelssroy-300x137.png 300w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/pcakernelssroy-768x352.png 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/pcakernelssroy.png 1360w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<h3 class=\"wp-block-heading has-text-align-center\">Propensity: Concept and Application <\/h3>\n\n\n\n<p id=\"Propensity\"><strong>Definition of Propensity<\/strong><br>Propensity refers to an inherent tendency or likelihood of something happening, generally related to the <strong>probability of a particular event occurring. <\/strong>The term is commonly used in <strong>statistics, economics, psychology, <\/strong>and various other disciplines to describe the tendency or probability that a certain outcome or event will happen given certain conditions or factors. In the realm of probability and statistics, propensity is often used to refer to the expected behavior or outcome based on a series of probabilities and conditions.<\/p>\n\n\n\n<p>In other words, propensity can be thought of as a measure of how likely an event is to occur under a specific set of circumstances.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Types of Propensity<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Statistical Propensity<\/strong>: This type refers to the probability that an event will occur given certain data or information. It involves calculating the likelihood of events based on historical or observed data.<\/li>\n\n\n\n<li><strong>Economic Propensity<\/strong>: In economics, the term &#8220;propensity&#8221; is often used to refer to the &#8220;marginal propensity,&#8221; which is the likelihood that an individual or household will spend or save additional income.<\/li>\n\n\n\n<li><strong>Behavioral Propensity<\/strong>: In psychology and behavioral economics, propensity refers to an individual&#8217;s inclination or tendency to perform a particular action or decision.<\/li>\n<\/ol>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-4 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"494\" height=\"491\" data-id=\"1550\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/1-6.jpg\" alt=\"\" class=\"wp-image-1550\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/1-6.jpg 494w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/1-6-300x298.jpg 300w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/1-6-150x150.jpg 150w\" sizes=\"auto, (max-width: 494px) 100vw, 494px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"665\" height=\"468\" data-id=\"1549\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/2-5.jpg\" alt=\"\" class=\"wp-image-1549\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/2-5.jpg 665w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/2-5-300x211.jpg 300w\" sizes=\"auto, (max-width: 665px) 100vw, 665px\" \/><\/figure>\n<\/figure>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-5 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"665\" height=\"468\" data-id=\"1552\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3-4.jpg\" alt=\"\" class=\"wp-image-1552\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3-4.jpg 665w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3-4-300x211.jpg 300w\" sizes=\"auto, (max-width: 665px) 100vw, 665px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"554\" height=\"388\" data-id=\"1553\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4-3.jpg\" alt=\"\" class=\"wp-image-1553\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4-3.jpg 554w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4-3-300x210.jpg 300w\" sizes=\"auto, (max-width: 554px) 100vw, 554px\" \/><\/figure>\n<\/figure>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-6 is-layout-flex wp-block-gallery-is-layout-flex\"><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\"><\/h3>\n\n\n\n<h3 class=\"wp-block-heading\">Propensity in Predictive Modeling<\/h3>\n\n\n\n<p>In machine learning, propensity models are used to predict the likelihood or probability that a specific event will occur. For instance, in marketing, a propensity model can predict the likelihood that a customer will purchase a product or respond to a marketing campaign. These models are built using data from previous interactions, customer behavior, demographics, and other relevant factors. The goal is to understand how different features of the data contribute to the likelihood of an event.<\/p>\n\n\n\n<p>For example, in an e-commerce setting, a propensity model might predict the likelihood that a user will purchase a specific item based on their browsing history, search queries, and past purchases. This allows businesses to target their marketing strategies more effectively, improving conversion rates and customer satisfaction.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Key Applications of Propensity in Machine Learning<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Customer Churn Prediction<\/strong>: One of the most important uses of propensity in machine learning is predicting customer churn, which refers to the likelihood that a customer will stop using a service or product. By analyzing historical data, such as past customer interactions, purchases, usage patterns, and engagement, machine learning models can calculate the propensity of a customer to churn. By identifying customers with a high propensity to churn, businesses can take proactive steps to retain them, such as offering personalized promotions or targeted communication.<\/li>\n\n\n\n<li><strong>Targeted Marketing and Personalization<\/strong>: Propensity models are widely used in marketing to predict the likelihood of a customer engaging with a particular campaign, responding to a promotional offer, or making a purchase. For example, a retailer may use a propensity model to predict the likelihood of a customer responding to a discount offer for a particular product. This allows the retailer to focus their efforts on the customers with the highest propensity, improving the efficiency of their marketing strategies and increasing return on investment (ROI).<\/li>\n\n\n\n<li><strong>Recommendation Systems<\/strong>: Propensity also plays a crucial role in recommendation systems, where the goal is to predict the likelihood of a user engaging with a product, service, or content. For example, in a movie recommendation system, the model might predict the propensity of a user to enjoy and watch a particular movie based on their past preferences and ratings. By calculating these propensities, the system can recommend the most relevant items to users, improving user experience and engagement.<\/li>\n\n\n\n<li><strong>Fraud Detection<\/strong>: In financial services, propensity models are used to predict the likelihood that a transaction or activity is fraudulent. By analyzing transaction patterns, user behavior, and historical data, machine learning models can estimate the propensity of a given action to be fraudulent. This helps in flagging high-risk transactions and protecting users from potential financial losses.<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">Techniques for Propensity Modeling<\/h3>\n\n\n\n<p>Several machine learning techniques are used for propensity modeling, with some of the most common methods being:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Logistic Regression<\/strong>: A popular method for predicting binary outcomes, logistic regression models the probability of an event occurring by using a sigmoid function to output values between 0 and 1, which can be interpreted as the propensity of an event.<\/li>\n\n\n\n<li><strong>Decision Trees and Random Forests<\/strong>: Decision trees can be used to segment the data based on different features, and each leaf node represents a predicted propensity for an event. Random forests combine multiple decision trees to improve prediction accuracy.<\/li>\n\n\n\n<li><strong>Gradient Boosting<\/strong>: Gradient boosting methods such as XGBoost or LightGBM can be used to build more accurate propensity models by combining weak learners to form a strong predictive model.<\/li>\n\n\n\n<li><strong>Neural Networks<\/strong>: Deep learning techniques, especially in the form of neural networks, can be used for complex propensity modeling, especially when dealing with large datasets with intricate relationships between features.<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">Evaluation of Propensity Models<\/h3>\n\n\n\n<p>To assess the effectiveness of propensity models, various performance metrics are used. These include:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Accuracy<\/strong>: Measures how often the model&#8217;s predictions are correct.<\/li>\n\n\n\n<li><strong>AUC-ROC (Area Under the Receiver Operating Characteristic Curve)<\/strong>: This metric evaluates the tradeoff between true positive rate and false positive rate, providing a comprehensive view of the model&#8217;s ability to distinguish between different classes (such as &#8216;purchase&#8217; vs &#8216;no purchase&#8217;).<\/li>\n\n\n\n<li><strong>Precision and Recall<\/strong>: Precision measures how many of the predicted positive cases are actually positive, while recall measures how many of the actual positive cases are identified by the model. These metrics are especially useful in situations where false positives or false negatives have different implications (e.g., in fraud detection or medical diagnosis).<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">Conclusion<\/h3>\n\n\n\n<p>In machine learning, propensity is a fundamental concept that helps in predicting the likelihood of specific events based on data patterns. Propensity models enable businesses to make informed decisions, such as targeting the right customers for marketing campaigns, predicting customer churn, or recommending personalized content. By leveraging techniques like logistic regression, decision trees, and gradient boosting, machine learning models can calculate propensities that enhance decision-making and improve outcomes. As machine learning continues to evolve, the importance of propensity modeling will continue to grow, especially in areas like customer behavior prediction, fraud detection, and personalized recommendations.<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"1159\" height=\"1600\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2.jpeg\" alt=\"\" class=\"wp-image-1941\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2.jpeg 1159w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2-217x300.jpeg 217w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2-742x1024.jpeg 742w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2-768x1060.jpeg 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/imgroy2-1113x1536.jpeg 1113w\" sizes=\"auto, (max-width: 1159px) 100vw, 1159px\" \/><\/figure>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/PDF_PROPENSITY-ROYSS.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of PDF_PROPENSITY-ROYSS.\"><\/object><a id=\"wp-block-file--media-74f7480b-0eb9-4f34-8d25-4e77cdd0108f\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/PDF_PROPENSITY-ROYSS.pdf\">PDF_PROPENSITY-ROYSS<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/PDF_PROPENSITY-ROYSS.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-74f7480b-0eb9-4f34-8d25-4e77cdd0108f\">Download<\/a><\/div>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u270f\ufe0f <strong>Practice Exercise: Predicting Subscription Propensity<\/strong><\/h3>\n\n\n\n<p><strong>Scenario:<\/strong><br>A digital magazine wants to predict whether a visitor will <strong>subscribe<\/strong> based on their recent engagement behavior.<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\ud83d\udcca <strong>Features Used:<\/strong><\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>x\u2081<\/strong>: Number of articles read in the past week<\/li>\n\n\n\n<li><strong>x\u2082<\/strong>: Time spent on the site (in minutes) in the past month<\/li>\n\n\n\n<li><strong>y<\/strong>: Subscription (1 = Yes, 0 = No)<\/li>\n<\/ul>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>Visitor<\/th><th>x\u2081 (Articles Read)<\/th><th>x\u2082 (Time in Minutes)<\/th><th>y (Subscribed)<\/th><\/tr><\/thead><tbody><tr><td>1<\/td><td>6<\/td><td>180<\/td><td>1<\/td><\/tr><tr><td>2<\/td><td>3<\/td><td>60<\/td><td>0<\/td><\/tr><tr><td>3<\/td><td>8<\/td><td>240<\/td><td>1<\/td><\/tr><tr><td>4<\/td><td>2<\/td><td>30<\/td><td>0<\/td><\/tr><tr><td>5<\/td><td>5<\/td><td>120<\/td><td>1<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\ud83d\udd27 <strong>Trained Logistic Regression Model:<\/strong><\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Intercept (b\u2080) = -2.5<\/li>\n\n\n\n<li>Coefficient for x\u2081 (b\u2081) = 0.4<\/li>\n\n\n\n<li>Coefficient for x\u2082 (b\u2082) = 0.01<\/li>\n<\/ul>\n\n\n\n<p><\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\ud83d\udd0d <strong>Your Task:<\/strong><\/h3>\n\n\n\n<p><strong>Predict the subscription probability<\/strong> for a new visitor who:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Read 4 articles (x\u2081 = 4)<\/li>\n\n\n\n<li>Spent 100 minutes (x\u2082 = 100)<\/li>\n<\/ul>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"630\" height=\"115\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log1.jpg\" alt=\"\" class=\"wp-image-1948\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log1.jpg 630w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log1-300x55.jpg 300w\" sizes=\"auto, (max-width: 630px) 100vw, 630px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"597\" height=\"179\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log2.jpg\" alt=\"\" class=\"wp-image-1949\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log2.jpg 597w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/07\/log2-300x90.jpg 300w\" sizes=\"auto, (max-width: 597px) 100vw, 597px\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u2705 <strong>Result:<\/strong><\/h3>\n\n\n\n<p>The predicted <strong>subscription propensity<\/strong> is <strong>52.5%<\/strong>.<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>Clustering<\/strong><\/h2>\n\n\n\n<p id=\"cluster\"><strong>Clustering<\/strong> is a fundamental technique in the field of data analysis and machine learning that involves grouping a set of objects or data points into clusters, or subsets, such that objects within the same cluster are more similar to each other than to those in other clusters. The goal of clustering is to discover inherent structures or patterns in the data without any prior knowledge of class labels or predefined categories. It is an unsupervised learning method, meaning that the algorithm does not rely on labeled data but instead identifies relationships and similarities based on the input features. <\/p>\n\n\n\n<p id=\"-\">Clustering plays a crucial role in many <strong>real-world applications, including market segmentation, image recognition, anomaly detection, and social network analysis.<\/strong> For example, businesses can use clustering to identify distinct customer segments, allowing them to tailor marketing strategies more effectively. In image recognition, clustering can help group similar images together based on visual features. Similarly, in social networks, clustering algorithms can identify communities of users with similar interests.<\/p>\n\n\n\n<p>The process of clustering typically starts with the selection of a suitable similarity or distance measure, such as <strong><a href=\"https:\/\/medium.com\/analytics-vidhya\/euclidean-and-manhattan-distance-metrics-in-machine-learning-a5942a8c9f2f\">Euclidean distance, Manhattan distance<\/a><\/strong>, or cosine similarity, to quantify the closeness between data points. The choice of distance metric depends on the nature of the data and the problem at hand. Once a similarity measure is chosen, various clustering algorithms can be applied to partition the data into clusters. These algorithms vary in their approach and assumptions, but all share the common goal of grouping similar data points.<\/p>\n\n\n\n<p>Several well-known clustering algorithms exist, each with its strengths and weaknesses. <strong>K-means<\/strong> is one of the most popular and widely used clustering algorithms. It works by partitioning the data into K clusters, where K is a user-defined parameter, and iteratively refines the cluster centroids until convergence. K-means is efficient and scalable, making it suitable for large datasets, but it has limitations, such as its sensitivity to the initial choice of cluster centroids and its assumption that clusters are spherical in-shape. Another well-known algorithm is hierarchical clustering, which builds a tree-like structure of nested clusters. Hierarchical clustering can be agglomerative, where clusters are progressively merged, or divisive, where clusters are split iteratively. This method does not require the number of clusters to be predefined and can provide a more intuitive understanding of the data&#8217;s structure. However, hierarchical clustering can be computationally expensive, especially for large datasets.<\/p>\n\n\n\n<p>Density-based clustering methods, such as<strong> DBSCAN (Density-Based Spatial Clustering of Applications with Noise)<\/strong>, are designed to find clusters of arbitrary shape and are particularly effective for datasets with noise and outliers. DBSCAN does not require the number of clusters to be specified in advance and instead relies on the density of points in a region to form clusters. This makes it suitable for scenarios where the number of clusters is not known beforehand or when clusters are not well-separated.<\/p>\n\n\n\n<p><strong>Clustering<\/strong> is an essential tool for exploratory data analysis, providing valuable insights into the inherent structure of data. By grouping similar objects together, it can reveal patterns that may not be immediately apparent, helping researchers, analysts, and practitioners make more informed decisions. However, clustering is not without its challenges, such as choosing the appropriate algorithm, determining the optimal number of clusters, and interpreting the results. Despite these challenges, clustering remains an invaluable technique in <strong>data-driven fields.<\/strong><\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"512\" height=\"256\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/512px-The_effect_of_z-score_normalization_on_k-means_clustering.svg_.png\" alt=\"\" class=\"wp-image-1359\" style=\"width:540px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/512px-The_effect_of_z-score_normalization_on_k-means_clustering.svg_.png 512w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/512px-The_effect_of_z-score_normalization_on_k-means_clustering.svg_-300x150.png 300w\" sizes=\"auto, (max-width: 512px) 100vw, 512px\" \/><\/figure>\n\n\n\n<p>Clustering is an essential technique in data analysis and machine learning, which groups data points or objects into distinct clusters based on similarity. This unsupervised learning method enables the discovery of hidden patterns or structures in the data without the need for labeled examples. The goal is to identify natural groupings within the dataset so that items within the same group share certain characteristics while being dissimilar to items in other groups. Let\u2019s explore clustering with detailed examples across various domains.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">1. <strong>Market Segmentation in Business<\/strong><\/h3>\n\n\n\n<p>A practical example of clustering can be found in market segmentation. Imagine a retail company that wants to identify different groups of customers based on their purchasing behaviors. The company gathers data on customer demographics, buying history, and preferences.<\/p>\n\n\n\n<p><strong>Process:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Data points: Each customer is represented by a vector of features (age, income, frequency of purchase, product preferences, etc.).<\/li>\n\n\n\n<li>Clustering algorithm: K-means or DBSCAN could be used to group customers based on their purchasing patterns.<\/li>\n\n\n\n<li>Outcome: The algorithm might reveal distinct groups of customers, such as:\n<ul class=\"wp-block-list\">\n<li>Group 1: Young professionals who frequently purchase technology and gadgets.<\/li>\n\n\n\n<li>Group 2: Middle-aged parents who buy household items and children\u2019s products.<\/li>\n\n\n\n<li>Group 3: Retired individuals who tend to buy health-related products.<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<p><strong>Benefit:<\/strong> This segmentation allows the business to target each group with tailored marketing strategies, such as promotions on tech gadgets for Group 1 or discounts on health-related items for Group 3. It helps optimize resources and improve customer satisfaction.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">2. <strong>Image Recognition in Computer Vision<\/strong><\/h3>\n\n\n\n<p>Clustering can be used to group similar images for classification or recognition tasks. Suppose a company wants to organize a large collection of photos, say from a social media platform, into categories such as &#8220;landscapes,&#8221; &#8220;portraits,&#8221; and &#8220;animals.&#8221;<\/p>\n\n\n\n<p><strong>Process:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Data points: Each image is represented by a vector of features, such as color histograms, edge detection patterns, or deep learning features extracted from pre-trained neural networks.<\/li>\n\n\n\n<li>Clustering algorithm: K-means or hierarchical clustering could be used to group similar images based on these features.<\/li>\n\n\n\n<li>Outcome: The algorithm might categorize the images into:\n<ul class=\"wp-block-list\">\n<li>Cluster 1: Images of natural landscapes, such as mountains, forests, and beaches.<\/li>\n\n\n\n<li>Cluster 2: Portraits or selfies with faces.<\/li>\n\n\n\n<li>Cluster 3: Animals, including cats, dogs, and wildlife.<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<p><strong>Benefit:<\/strong> Clustering simplifies organizing vast image datasets, making it easier for users to browse and search through photos. It could also support automated tagging or categorization of new images as they are uploaded.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">3. <strong>Anomaly Detection in Fraud Detection<\/strong><\/h3>\n\n\n\n<p>Clustering can be helpful in identifying unusual patterns or outliers within data, which is particularly useful in fraud detection. For instance, a bank may want to detect fraudulent transactions based on customer behavior.<\/p>\n\n\n\n<p><strong>Process:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Data points: Each transaction is represented by features such as transaction amount, frequency, location, and time.<\/li>\n\n\n\n<li>Clustering algorithm: DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is effective in this case, as it can find clusters of normal transactions while identifying transactions that do not belong to any cluster (outliers).<\/li>\n\n\n\n<li>Outcome: Most transactions fall into clusters representing regular customer behavior. However, some transactions, such as a large withdrawal from a foreign country, will not belong to any cluster, signaling potentially fraudulent activity.<\/li>\n<\/ul>\n\n\n\n<p><strong>Benefit:<\/strong> By automatically detecting transactions that deviate from normal patterns, banks can flag suspicious activities for further investigation, preventing fraud and ensuring security.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">4. <strong>Social Network Analysis<\/strong><\/h3>\n\n\n\n<p>In social networks, clustering algorithms can help detect communities of users who share common interests or social connections. For example, an online platform like Twitter might want to identify groups of users with similar interests to recommend relevant content.<\/p>\n\n\n\n<p><strong>Process:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Data points: Each user is represented by a set of features such as the number of followers, topics of posts, and interactions with others.<\/li>\n\n\n\n<li>Clustering algorithm: Spectral clustering or community detection algorithms (like the Louvain method) can be applied to find communities.<\/li>\n\n\n\n<li>Outcome: The algorithm might identify several communities of users, such as:\n<ul class=\"wp-block-list\">\n<li>Community 1: Sports enthusiasts discussing football and basketball.<\/li>\n\n\n\n<li>Community 2: Food lovers sharing recipes and restaurant reviews.<\/li>\n\n\n\n<li>Community 3: Environmental activists focusing on climate change and sustainability.<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<p><strong>Benefit:<\/strong> This clustering helps the platform recommend relevant posts or users to follow, improving user engagement and experience.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">5. <strong>Genetic Data Analysis in Biology<\/strong><\/h3>\n\n\n\n<p>Clustering can be used in biology to group genes or species with similar genetic traits. For example, researchers might want to identify clusters of genes that are involved in the same biological pathways.<\/p>\n\n\n\n<p><strong>Process:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Data points: Each gene is represented by a vector of expression levels across various conditions or tissues.<\/li>\n\n\n\n<li>Clustering algorithm: Hierarchical clustering or K-means could be used to identify genes with similar expression patterns.<\/li>\n\n\n\n<li>Outcome: The algorithm might reveal groups of genes, such as:\n<ul class=\"wp-block-list\">\n<li>Cluster 1: Genes highly expressed during the immune response.<\/li>\n\n\n\n<li>Cluster 2: Genes associated with cell growth and division.<\/li>\n\n\n\n<li>Cluster 3: Genes that are more active during stress conditions.<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<p><strong>Benefit:<\/strong> Clustering genes based on their expression patterns can help researchers understand biological processes and identify potential targets for drug development.<\/p>\n\n\n\n<p>Clustering is a versatile and powerful tool used across many fields for uncovering hidden patterns, categorizing data, and making sense of complex datasets. Whether for <strong>market segmentation, image recognition, fraud detection, social network analysis, or genetic research<\/strong>, clustering provides valuable insights that can drive decision-making and improve outcomes. Despite its challenges, such as determining the right number of clusters and handling noisy data, clustering remains one of the most important methods in data science and machine learning.<\/p>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>k-means clustering<\/strong><\/h2>\n\n\n\n<p>K-Means clustering is one of the most popular and widely used unsupervised machine learning algorithms, designed to partition a set of data points into a predefined number of clusters, K. The primary goal of K-Means is to group data points in such a way that data points within the same cluster are more similar to each other than to those in other clusters. This is achieved through a process of iterative refinement, where the algorithm repeatedly assigns data points to clusters and then updates the cluster centroids until the clustering stabilizes.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">How K-Means Clustering Works<\/h3>\n\n\n\n<p>K-Means works in the following steps:<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Initialization<\/strong>: The algorithm begins by selecting K initial centroids (one for each cluster). These centroids can either be chosen randomly from the data points or selected using more sophisticated methods like the K-Means++ algorithm, which aims to spread out the initial centroids to improve the clustering process.<\/li>\n\n\n\n<li><strong>Assignment Step<\/strong>: Each data point is assigned to the nearest centroid. The &#8220;nearest&#8221; centroid is determined based on a chosen distance metric, typically Euclidean distance, which measures the straight-line distance between a data point and the centroid.<\/li>\n\n\n\n<li><strong>Update Step<\/strong>: After all data points have been assigned to clusters, the centroids are recalculated. The new centroid of each cluster is the mean (average) of all the points assigned to that cluster. This means that the new centroid is the point that minimizes the sum of squared distances to all points within the cluster.<\/li>\n\n\n\n<li><strong>Repeat<\/strong>: The assignment and update steps are repeated iteratively. In each iteration, data points may be reassigned to different clusters, and centroids are updated accordingly. This continues until the assignments no longer change, or until a predefined number of iterations is reached, signaling convergence.<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">Example of K-Means Clustering<\/h3>\n\n\n\n<p>Consider a simple example where we have a dataset of customer purchasing behavior, with two features: income and spending score. We want to group the customers into K=3 clusters based on these features.<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Initialization<\/strong>: Suppose K=3, so we randomly select three initial centroids from the data points.<\/li>\n\n\n\n<li><strong>Assignment<\/strong>: Each customer is assigned to the closest centroid based on their income and spending score.<\/li>\n\n\n\n<li><strong>Update<\/strong>: After assigning customers to the clusters, we calculate the mean income and spending score for each group and update the centroids.<\/li>\n\n\n\n<li><strong>Repeat<\/strong>: The steps are repeated until the customer assignments no longer change, and the final clusters represent groups of customers with similar spending patterns.<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">Key Characteristics of K-Means<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Efficiency<\/strong>: K-Means is computationally efficient and scales well with large datasets. Its time complexity is typically O(n * K * I), where n is the number of data points, K is the number of clusters, and I is the number of iterations. This makes it faster than many other clustering algorithms, especially when working with large datasets.<\/li>\n\n\n\n<li><strong>Convergence<\/strong>: K-Means converges to a local optimum solution, meaning that it will find a solution that is optimal in terms of minimizing the sum of squared distances within each cluster, but it is not guaranteed to find the global optimum. The outcome can depend on the initial selection of centroids.<\/li>\n\n\n\n<li><strong>Assumptions<\/strong>: K-Means assumes that clusters are spherical and of roughly equal size. This means that it may not perform well when clusters are of different shapes or densities.<\/li>\n\n\n\n<li><strong>Sensitivity to Initialization<\/strong>: The performance of K-Means can be heavily influenced by the initial selection of centroids. Poor initialization can lead to suboptimal clustering. This is why methods like K-Means++ were introduced to improve the selection of initial centroids and reduce the chance of poor results.<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">Strengths and Limitations of K-Means<\/h3>\n\n\n\n<p><strong>Strengths<\/strong>:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Simple and easy to understand.<\/li>\n\n\n\n<li>Fast and scalable, making it suitable for large datasets.<\/li>\n\n\n\n<li>Versatile and can be used in a variety of domains, from market segmentation to image compression.<\/li>\n<\/ul>\n\n\n\n<p><strong>Limitations<\/strong>:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>The number of clusters (K) must be specified in advance, which can be challenging without prior knowledge of the data.<\/li>\n\n\n\n<li>It can be sensitive to outliers, which may distort the final clusters.<\/li>\n\n\n\n<li>The assumption of spherical clusters means K-Means may not perform well on datasets with non-spherical or highly imbalanced clusters.<\/li>\n\n\n\n<li>It may converge to a local minimum, not necessarily the global optimal solution.<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">Applications of K-Means<\/h3>\n\n\n\n<p>K-Means is used in a wide variety of real-world applications, including:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Customer Segmentation<\/strong>: Grouping customers based on purchasing behavior to tailor marketing strategies.<\/li>\n\n\n\n<li><strong>Image Compression<\/strong>: Reducing the size of image files by clustering similar pixel values and representing them with the average value.<\/li>\n\n\n\n<li><strong>Document Clustering<\/strong>: Grouping documents with similar content in information retrieval systems.<\/li>\n\n\n\n<li><strong>Anomaly Detection<\/strong>: Identifying unusual patterns by clustering normal data points and flagging outliers.<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">Conclusion<\/h3>\n\n\n\n<p><strong>K-Means<\/strong> clustering is a powerful and efficient algorithm for grouping data points into clusters based on similarity. Despite its simplicity, it is widely used in various fields due to its scalability, ease of use, and effectiveness in uncovering patterns in large datasets. However, it is important to consider its limitations, such as sensitivity to initialization and the need for a predefined number of clusters. Despite these challenges, K-Means remains one of the most popular clustering algorithms in data analysis and machine learning.<\/p>\n\n\n\n<p><strong><a href=\"https:\/\/medium.com\/@karna.sujan52\/k-means-algorithm-solved-numerical-3c94d25076e8\">Excercise-1<\/a><\/strong><\/p>\n\n\n\n<p>Implement the K-Means algorithm with K = 2 on the data points (185, 72), (170, 56), (168, 60), (179, 68), (182, 72), and (188, 77) for two iterations, and display the resulting clusters. Initially, select the first two data points as the initial centroids.<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"598\" height=\"301\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/table-1.jpg\" alt=\"\" class=\"wp-image-1375\" style=\"width:746px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/table-1.jpg 598w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/table-1-300x151.jpg 300w\" sizes=\"auto, (max-width: 598px) 100vw, 598px\" \/><\/figure>\n\n\n\n<p>In the <strong>initial step, <\/strong>we determine the similarity between data points using the Euclidean distance metric.<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"822\" height=\"753\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_first.webp\" alt=\"\" class=\"wp-image-1376\" style=\"width:695px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_first.webp 822w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_first-300x275.webp 300w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_first-768x704.webp 768w\" sizes=\"auto, (max-width: 822px) 100vw, 822px\" \/><\/figure>\n\n\n\n<p>In tabular-form it can be represented as,<\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"544\" height=\"173\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_2nd.webp\" alt=\"\" class=\"wp-image-1377\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_2nd.webp 544w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/3_2nd-300x95.webp 300w\" sizes=\"auto, (max-width: 544px) 100vw, 544px\" \/><\/figure>\n\n\n\n<p>The result after first iteration. <\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"608\" height=\"313\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4th.png\" alt=\"\" class=\"wp-image-1378\" style=\"width:583px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4th.png 608w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/4th-300x154.png 300w\" sizes=\"auto, (max-width: 608px) 100vw, 608px\" \/><\/figure>\n\n\n\n<p>In the second iteration, calculating centroids again, <\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"200\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/5th-1024x200.webp\" alt=\"\" class=\"wp-image-1379\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/5th-1024x200.webp 1024w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/5th-300x59.webp 300w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/5th-768x150.webp 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/5th.webp 1047w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p>Calculating distances again,<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"659\" height=\"807\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/6th.webp\" alt=\"\" class=\"wp-image-1380\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/6th.webp 659w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/6th-245x300.webp 245w\" sizes=\"auto, (max-width: 659px) 100vw, 659px\" \/><\/figure>\n\n\n\n<p>In tabular form,<\/p>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"632\" height=\"174\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/7th.jpg\" alt=\"\" class=\"wp-image-1381\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/7th.jpg 632w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/7th-300x83.jpg 300w\" sizes=\"auto, (max-width: 632px) 100vw, 632px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"590\" height=\"260\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/8TH.png\" alt=\"\" class=\"wp-image-1382\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/8TH.png 590w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/8TH-300x132.png 300w\" sizes=\"auto, (max-width: 590px) 100vw, 590px\" \/><\/figure>\n\n\n\n<p>As two iterations have already been completed as required by the problem, the numerical process concludes here. Since the clustering remains unchanged after the second iteration, the process will be terminated, even if the question does not explicitly state to do so.<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code><strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-ast-global-color-1-color\">#Python code for K means clustering<\/mark><\/strong>\nimport pandas as pd\nfrom sklearn.cluster import KMeans\n\n# Sample data from the image\ndata = {'Instance': &#91;1, 2, 3, 4, 5, 6],\n        'X': &#91;185, 170, 168, 179, 182, 188],\n        'Y': &#91;72, 56, 60, 68, 72, 77]}\n\ndf = pd.DataFrame(data)\n\n# Select features for clustering\nX = df&#91;&#91;'X', 'Y']]\n\n# Choose the number of clusters (K)\nnum_clusters = 2  # You can adjust this value\n\n# Create a KMeans object\nkmeans = KMeans(n_clusters=num_clusters, random_state=42) \n\n# Fit the model to the data\nkmeans.fit(X)\n\n# Get cluster labels for each data point\nlabels = kmeans.labels_\n\n# Adjust cluster labels to start from 1\nadjusted_labels = labels + 1 \n\n# Add adjusted cluster labels to the DataFrame\ndf&#91;'Cluster'] = adjusted_labels\n\n# Print the clustered data\nprint(df)\n\nOutput:\n   Instance     X     Y  Cluster\n0         1  185.0  72.0       1\n1         2  170.0  56.0       2\n2         3  168.0  60.0       2\n3         4  179.0  68.0       1\n4         5  182.0  72.0       1\n5         6  188.0  77.0       1<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>Collaborative Filtering<\/strong><\/h2>\n\n\n\n<h6 class=\"wp-block-heading\">Introduction to Collaborative Filtering and Recommendation Systems<\/h6>\n\n\n\n<p id=\"col\">Recommendation systems are algorithms used to suggest items to users based on various criteria, such as past behavior, preferences, and the behavior of similar users. Collaborative filtering is one of the most widely used techniques for building recommendation systems. It works by making recommendations based on the preferences or ratings of other users who are similar to the target user.<\/p>\n\n\n\n<p>There are generally <strong>two types of recommendation systems<\/strong>:<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Collaborative Filtering<\/strong>: Uses the past behaviors and ratings of users to recommend items.\n<ul class=\"wp-block-list\">\n<li><strong>User-based Collaborative Filtering<\/strong>: Recommends items based on similarities between users.<\/li>\n\n\n\n<li><strong>Item-based Collaborative Filtering<\/strong>: Recommends items that are similar to items the user has liked before.<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>Content-Based Filtering<\/strong>: Recommends items based on the attributes of the items and the user\u2019s past interactions with those attributes.<\/li>\n\n\n\n<li><strong>Hybrid Models<\/strong>: Combines both collaborative and content-based approaches to provide better recommendations.<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">Required Formulas<\/h3>\n\n\n\n<p>In collaborative filtering, especially for user-based collaborative filtering, we typically use the <strong>Pearson correlation coefficient<\/strong> to calculate the similarity between users. The following formulas are required:<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"731\" height=\"390\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/for1.jpg\" alt=\"\" class=\"wp-image-1584\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/for1.jpg 731w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/01\/for1-300x160.jpg 300w\" sizes=\"auto, (max-width: 731px) 100vw, 731px\" \/><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">1. Data Preparation<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">Original Rating Matrix:<\/h4>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>User<\/th><th>Inshorts (I1)<\/th><th>HT (I2)<\/th><th>NYT (I3)<\/th><th>TOI (I4)<\/th><th>BBC (I5)<\/th><\/tr><\/thead><tbody><tr><td>Alice<\/td><td>5<\/td><td>4<\/td><td>1<\/td><td>4<\/td><td>?<\/td><\/tr><tr><td>U1<\/td><td>3<\/td><td>1<\/td><td>2<\/td><td>3<\/td><td>3<\/td><\/tr><tr><td>U2<\/td><td>4<\/td><td>3<\/td><td>4<\/td><td>3<\/td><td>5<\/td><\/tr><tr><td>U3<\/td><td>3<\/td><td>3<\/td><td>1<\/td><td>5<\/td><td>4<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">2. Calculate Average Ratings for Each User<\/h3>\n\n\n\n<p>Formula:<br><strong>Average Rating<\/strong> = (Sum of Ratings for all rated items) \/ (Number of rated items)<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>User<\/th><th>Inshorts (I1)<\/th><th>HT (I2)<\/th><th>NYT (I3)<\/th><th>TOI (I4)<\/th><th>Average Rating<\/th><\/tr><\/thead><tbody><tr><td>Alice<\/td><td>5<\/td><td>4<\/td><td>1<\/td><td>4<\/td><td>3.5<\/td><\/tr><tr><td>U1<\/td><td>3<\/td><td>1<\/td><td>2<\/td><td>3<\/td><td>2.25<\/td><\/tr><tr><td>U2<\/td><td>4<\/td><td>3<\/td><td>4<\/td><td>3<\/td><td>3.5<\/td><\/tr><tr><td>U3<\/td><td>3<\/td><td>3<\/td><td>1<\/td><td>5<\/td><td>3<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p><strong>Average Ratings:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Alice: (5 + 4 + 1 + 4) \/ 4 = <strong>3.5<\/strong><\/li>\n\n\n\n<li>U1: (3 + 1 + 2 + 3) \/ 4 = <strong>2.25<\/strong><\/li>\n\n\n\n<li>U2: (4 + 3 + 4 + 3) \/ 4 = <strong>3.5<\/strong><\/li>\n\n\n\n<li>U3: (3 + 3 + 1 + 5) \/ 4 = <strong>3<\/strong><\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">3. Center the Ratings (Subtract Average Rating from Each Rating)<\/h3>\n\n\n\n<p>Formula:<br><strong>Centered Rating<\/strong> = Original Rating &#8211; Average Rating<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>User<\/th><th>Inshorts (I1)<\/th><th>HT (I2)<\/th><th>NYT (I3)<\/th><th>TOI (I4)<\/th><th>BBC (I5)<\/th><\/tr><\/thead><tbody><tr><td>Alice<\/td><td>1.5<\/td><td>0.5<\/td><td>-2.5<\/td><td>0.5<\/td><td>?<\/td><\/tr><tr><td>U1<\/td><td>0.75<\/td><td>-1.25<\/td><td>-0.25<\/td><td>0.75<\/td><td>?<\/td><\/tr><tr><td>U2<\/td><td>0.5<\/td><td>-0.5<\/td><td>0.5<\/td><td>-0.5<\/td><td>?<\/td><\/tr><tr><td>U3<\/td><td>0<\/td><td>0<\/td><td>-2<\/td><td>2<\/td><td>?<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p><strong>Centered Ratings Calculation:<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Alice:\n<ul class=\"wp-block-list\">\n<li>Inshorts (I1) = 5 &#8211; 3.5 = <strong>1.5<\/strong><\/li>\n\n\n\n<li>HT (I2) = 4 &#8211; 3.5 = <strong>0.5<\/strong><\/li>\n\n\n\n<li>NYT (I3) = 1 &#8211; 3.5 = <strong>-2.5<\/strong><\/li>\n\n\n\n<li>TOI (I4) = 4 &#8211; 3.5 = <strong>0.5<\/strong><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>U1:\n<ul class=\"wp-block-list\">\n<li>Inshorts (I1) = 3 &#8211; 2.25 = <strong>0.75<\/strong><\/li>\n\n\n\n<li>HT (I2) = 1 &#8211; 2.25 = <strong>-1.25<\/strong><\/li>\n\n\n\n<li>NYT (I3) = 2 &#8211; 2.25 = <strong>-0.25<\/strong><\/li>\n\n\n\n<li>TOI (I4) = 3 &#8211; 2.25 = <strong>0.75<\/strong><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>U2:\n<ul class=\"wp-block-list\">\n<li>Inshorts (I1) = 4 &#8211; 3.5 = <strong>0.5<\/strong><\/li>\n\n\n\n<li>HT (I2) = 3 &#8211; 3.5 = <strong>-0.5<\/strong><\/li>\n\n\n\n<li>NYT (I3) = 4 &#8211; 3.5 = <strong>0.5<\/strong><\/li>\n\n\n\n<li>TOI (I4) = 3 &#8211; 3.5 = <strong>-0.5<\/strong><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>U3:\n<ul class=\"wp-block-list\">\n<li>Inshorts (I1) = 3 &#8211; 3 = <strong>0<\/strong><\/li>\n\n\n\n<li>HT (I2) = 3 &#8211; 3 = <strong>0<\/strong><\/li>\n\n\n\n<li>NYT (I3) = 1 &#8211; 3 = <strong>-2<\/strong><\/li>\n\n\n\n<li>TOI (I4) = 5 &#8211; 3 = <strong>2<\/strong><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"695\" height=\"301\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image.png\" alt=\"\" class=\"wp-image-2109\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image.png 695w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-300x130.png 300w\" sizes=\"auto, (max-width: 695px) 100vw, 695px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"707\" height=\"266\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-1.png\" alt=\"\" class=\"wp-image-2110\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-1.png 707w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-1-300x113.png 300w\" sizes=\"auto, (max-width: 707px) 100vw, 707px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"723\" height=\"366\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-2.png\" alt=\"\" class=\"wp-image-2111\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-2.png 723w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-2-300x152.png 300w\" sizes=\"auto, (max-width: 723px) 100vw, 723px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"709\" height=\"379\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-3.png\" alt=\"\" class=\"wp-image-2112\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-3.png 709w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-3-300x160.png 300w\" sizes=\"auto, (max-width: 709px) 100vw, 709px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"699\" height=\"388\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-4.png\" alt=\"\" class=\"wp-image-2113\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-4.png 699w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-4-300x167.png 300w\" sizes=\"auto, (max-width: 699px) 100vw, 699px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"670\" height=\"383\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-5.png\" alt=\"\" class=\"wp-image-2114\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-5.png 670w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-5-300x171.png 300w\" sizes=\"auto, (max-width: 670px) 100vw, 670px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"677\" height=\"293\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-6.png\" alt=\"\" class=\"wp-image-2115\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-6.png 677w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-6-300x130.png 300w\" sizes=\"auto, (max-width: 677px) 100vw, 677px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"589\" height=\"62\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-7.png\" alt=\"\" class=\"wp-image-2116\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-7.png 589w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-7-300x32.png 300w\" sizes=\"auto, (max-width: 589px) 100vw, 589px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"699\" height=\"432\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-8.png\" alt=\"\" class=\"wp-image-2117\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-8.png 699w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/image-8-300x185.png 300w\" sizes=\"auto, (max-width: 699px) 100vw, 699px\" \/><\/figure>\n\n\n\n<p>Alice&#8217;s predicted rating for BBC News (I5) is approximately <strong>5.04<\/strong>.<\/p>\n\n\n\n<p>This prediction is based on the weighted average of Alice\u2019s similarities with other users and their respective ratings for BBC News.<\/p>\n\n\n\n<p>Here is the <strong><mark style=\"background-color:rgba(0, 0, 0, 0);color:#dd2828\" class=\"has-inline-color\">Python code to solve the collaborative filtering problem you provided. This code will:<\/mark><\/strong><\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Calculate the average ratings for each user.<\/li>\n\n\n\n<li>Center the ratings.<\/li>\n\n\n\n<li>Calculate the Pearson similarity between users.<\/li>\n\n\n\n<li>Predict Alice&#8217;s rating for BBC News (I5).<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-code\"><code>import numpy as np\n\n# Original Rating Matrix\nratings = {\n    'Alice': &#91;5, 4, 1, 4, None],\n    'U1': &#91;3, 1, 2, 3, 3],\n    'U2': &#91;4, 3, 4, 3, 5],\n    'U3': &#91;3, 3, 1, 5, 4]\n}\n\n# Calculate Average Rating for each user\ndef calculate_average_rating(ratings):\n    average_ratings = {}\n    for user, user_ratings in ratings.items():\n        # Calculate average of non-None ratings\n        valid_ratings = &#91;rating for rating in user_ratings if rating is not None]\n        average_ratings&#91;user] = sum(valid_ratings) \/ len(valid_ratings)\n    return average_ratings\n\n# Center the ratings by subtracting the average rating\ndef center_ratings(ratings, average_ratings):\n    centered_ratings = {}\n    for user, user_ratings in ratings.items():\n        centered_ratings&#91;user] = &#91;rating - average_ratings&#91;user] if rating is not None else None for rating in user_ratings]\n    return centered_ratings\n\n# Calculate Pearson similarity between two users\ndef pearson_similarity(user1, user2, centered_ratings):\n    common_ratings = &#91;]\n    for i in range(len(user1)):\n        if centered_ratings&#91;user1]&#91;i] is not None and centered_ratings&#91;user2]&#91;i] is not None:\n            common_ratings.append(i)\n    \n    if not common_ratings:\n        return 0\n    \n    numerator = sum(centered_ratings&#91;user1]&#91;i] * centered_ratings&#91;user2]&#91;i] for i in common_ratings)\n    denominator = np.sqrt(sum(centered_ratings&#91;user1]&#91;i] ** 2 for i in common_ratings)) * np.sqrt(sum(centered_ratings&#91;user2]&#91;i] ** 2 for i in common_ratings))\n    \n    return numerator \/ denominator if denominator != 0 else 0\n\n# Predict Alice's rating for BBC News (I5)\ndef predict_rating(user, target_item, ratings, centered_ratings, average_ratings):\n    similarities = &#91;]\n    weighted_ratings = &#91;]\n    \n    for other_user in ratings:\n        if other_user != user and ratings&#91;other_user]&#91;target_item] is not None:\n            similarity = pearson_similarity(user, other_user, centered_ratings)\n            similarities.append((other_user, similarity))\n            weighted_ratings.append(similarity * ratings&#91;other_user]&#91;target_item])\n    \n    if not similarities:\n        return average_ratings&#91;user]\n    \n    total_similarity = sum(abs(similarity) for _, similarity in similarities)\n    predicted_rating = average_ratings&#91;user] + sum(weighted_ratings) \/ total_similarity if total_similarity != 0 else average_ratings&#91;user]\n    \n    return predicted_rating\n\n# Main program to calculate everything\naverage_ratings = calculate_average_rating(ratings)\ncentered_ratings = center_ratings(ratings, average_ratings)\n\n# Predict Alice's rating for BBC News (I5), which is index 4\npredicted_rating = predict_rating('Alice', 4, ratings, centered_ratings, average_ratings)\n\n# Output\nprint(f\"Alice's predicted rating for BBC News (I5): {predicted_rating:.2f}\")\n\n<strong>Output:\nAlice's predicted rating for BBC News (I5): 5.04<\/strong>\n\n\/\/Explanation of Code:\ncalculate_average_rating: This function calculates the average rating for each user by summing all the ratings and dividing by the number of ratings (excluding None values).\n\ncenter_ratings: This function centers the ratings for each user by subtracting the user's average rating from each of their ratings.\n\npearson_similarity: This function calculates the Pearson similarity between two users based on their centered ratings.\n\npredict_rating: This function predicts the rating for a given user and item (in this case, Alice's rating for BBC News, I5). It uses Pearson similarity to compute a weighted average of ratings from similar users.<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Collaborative Filtering using Jaccard Similarrity<\/strong><\/h2>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"476\" height=\"370\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Jacard_Similarity.png\" alt=\"\" class=\"wp-image-2048\" style=\"width:558px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Jacard_Similarity.png 476w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Jacard_Similarity-300x233.png 300w\" sizes=\"auto, (max-width: 476px) 100vw, 476px\" \/><\/figure>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_SSROY_2025.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of COLLABORATIVE_FILTERING_SSROY_2025.\"><\/object><a id=\"wp-block-file--media-d587253a-0908-4653-8007-2d15753d93d6\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_SSROY_2025.pdf\">COLLABORATIVE_FILTERING_SSROY_2025<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_SSROY_2025.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-d587253a-0908-4653-8007-2d15753d93d6\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Collaborative Filtering using Cosine Similarrity<\/strong><\/h2>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"533\" height=\"378\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Cosine_Sim_Fig-1-1.png\" alt=\"\" class=\"wp-image-2051\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Cosine_Sim_Fig-1-1.png 533w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/Cosine_Sim_Fig-1-1-300x213.png 300w\" sizes=\"auto, (max-width: 533px) 100vw, 533px\" \/><\/figure>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_COSINE_SIMI_SSROY_2025.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of COLLABORATIVE_FILTERING_COSINE_SIMI_SSROY_2025.\"><\/object><a id=\"wp-block-file--media-daa86b75-d001-4b16-90f4-1b3f218de81c\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_COSINE_SIMI_SSROY_2025.pdf\">COLLABORATIVE_FILTERING_COSINE_SIMI_SSROY_2025<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/08\/COLLABORATIVE_FILTERING_COSINE_SIMI_SSROY_2025.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-daa86b75-d001-4b16-90f4-1b3f218de81c\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"lin\"><strong>Linear Regression<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Linear-Regression_Estimations.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Linear-Regression_Estimations.\"><\/object><a id=\"wp-block-file--media-12da8ab5-8861-44b7-9ff3-1f0ae8b70cdd\" href=\"https:\/\/cyberenlightener.com\/?attachment_id=1635\">Linear-Regression_Estimations<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" id=\"metricsreg\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Final_Linear_Regression_AllErrors.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Final_Linear_Regression_AllErrors.\"><\/object><a id=\"wp-block-file--media-6e7f4108-4bde-49d4-a9d7-3be647fc2245\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Final_Linear_Regression_AllErrors.pdf\">Final_Linear_Regression_AllErrors<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Final_Linear_Regression_AllErrors.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-6e7f4108-4bde-49d4-a9d7-3be647fc2245\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"611\" height=\"324\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image.png\" alt=\"\" class=\"wp-image-2309\" style=\"width:1191px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image.png 611w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image-300x159.png 300w\" sizes=\"auto, (max-width: 611px) 100vw, 611px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"597\" height=\"301\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image-1.png\" alt=\"\" class=\"wp-image-2311\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image-1.png 597w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/image-1-300x151.png 300w\" sizes=\"auto, (max-width: 597px) 100vw, 597px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 19 : Regression Analysis || Introduction to Machine Learning\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/drhrW0sb6MI?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 20 : How to write a python code of Simple Linear Regression on Google Colab?| Intro to ML\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/2lOqNVA6Fmw?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 21: Estimating \u03b20  and \u03b21 in Linear Regression\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/Sd2me8BFey8?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"multiple\"><strong>Multiple Linear Regression<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Multiple_Linear_Regression.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Multiple_Linear_Regression.\"><\/object><a id=\"wp-block-file--media-9e4e8f74-29f4-4cf9-a656-927891c745f8\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Multiple_Linear_Regression.pdf\">Multiple_Linear_Regression<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Multiple_Linear_Regression.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-9e4e8f74-29f4-4cf9-a656-927891c745f8\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Multiple-Linear-Regression-Example-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Multiple Linear Regression Example (1).\"><\/object><a id=\"wp-block-file--media-17eb889e-9f5c-485e-b988-0605116de8af\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Multiple-Linear-Regression-Example-1.pdf\">Multiple Linear Regression Example (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Multiple-Linear-Regression-Example-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-17eb889e-9f5c-485e-b988-0605116de8af\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"Logistic\"><strong>Logistic Regression<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Main_Prod-2.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Main_Prod (2).\"><\/object><a id=\"wp-block-file--media-64e2c55e-bd9c-48f3-ab49-f1c3c8cdbb4c\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Main_Prod-2.pdf\">Main_Prod (2)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Main_Prod-2.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-64e2c55e-bd9c-48f3-ab49-f1c3c8cdbb4c\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec22 : Logistic Regression || Introduction to Machine Learning\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/IYOIDUWzlu8?start=44&#038;feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"Regression\"><strong>Regression tree<\/strong><\/h2>\n\n\n\n<p>A <strong><a href=\"https:\/\/www.ibm.com\/docs\/en\/db2-warehouse?topic=rt-background\">regression tree<\/a><\/strong> is a type of decision tree adapted for regression problems, where the target variable is continuous rather than categorical. While traditional decision trees\u2014like ID3, C4.5, and <a href=\"https:\/\/en.wikipedia.org\/wiki\/Decision_tree_learning\">CART<\/a>\u2014are primarily designed for classification tasks, regression trees use similar principles but apply different metrics to split the data. Instead of focusing on metrics like information gain, gain ratio, or the Gini index (used for classification), regression trees typically use metrics related to minimizing error, such as variance reduction or mean squared error (MSE).<\/p>\n\n\n\n<p>The most common algorithm for building regression trees is the CART (Classification and Regression Trees) algorithm. While CART works for classification tasks by using the Gini index, for regression problems, CART adapts by using the variance of the target variable as the criterion for splitting nodes.<\/p>\n\n\n\n<h5 class=\"wp-block-heading\">Step-by-Step Guide for Regression Trees with CART:<\/h5>\n\n\n\n<p>Let&#8217;s walk through a simple example using the same dataset used for classification in a prior experiment (golf playing decision). However, in this case, the target variable represents the number of golf players, which is a continuous numerical value rather than a categorical one (like true\/false in the original experiment).<\/p>\n\n\n\n<h5 class=\"wp-block-heading\">1. <strong>Data Understanding<\/strong><\/h5>\n\n\n\n<p>In the previous classification version of this dataset, the target column represented the decision to play golf (True\/False). Here, the target column represents the number of golf players, which is a real number. The features might be the same (e.g., weather conditions, time of day), but the key difference is that the target is continuous rather than categorical.<\/p>\n\n\n\n<h5 class=\"wp-block-heading\">2. <strong>Handling Continuous Target Variable<\/strong><\/h5>\n\n\n\n<p>Since the target is now a continuous variable, we cannot use traditional classification metrics like counting occurrences of &#8220;True&#8221; and &#8220;False.&#8221; Instead, we use the variance of the target variable as a way to decide how to split the data. The objective is to reduce the variance within each subset after a split.<\/p>\n\n\n\n<h5 class=\"wp-block-heading\">3. <strong>Splitting the Data (Using Variance)<\/strong><\/h5>\n\n\n\n<ul class=\"wp-block-list\">\n<li>For each potential split, we calculate the variance (or standard deviation) of the target variable in the two child nodes.<\/li>\n\n\n\n<li>The goal is to choose the split that results in the largest reduction in variance. The split with the least variance in each subset will indicate the most &#8220;homogeneous&#8221; groups with respect to the target variable.<\/li>\n<\/ul>\n\n\n\n<h5 class=\"wp-block-heading\">4. <strong>Recursive Process<\/strong><\/h5>\n\n\n\n<ul class=\"wp-block-list\">\n<li>The tree-building process proceeds recursively. After finding the best split based on variance reduction, the dataset is divided into two subsets. The same process is then applied to each of these subsets until the stopping criteria are met (e.g., a maximum tree depth or a minimum number of data points in a leaf node).<\/li>\n<\/ul>\n\n\n\n<h5 class=\"wp-block-heading\">5. <strong>Prediction with Regression Tree<\/strong><\/h5>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Once the tree is built, each leaf node will contain the predicted value for the target variable. For regression trees, the value predicted by a leaf node is typically the mean of the target variable for the instances in that leaf.<\/li>\n\n\n\n<li>New instances are passed down the tree, and the prediction is made by reaching a leaf and taking the average of the target values in that leaf.<\/li>\n<\/ul>\n\n\n\n<h5 class=\"wp-block-heading\">6. <strong>Pruning the Tree<\/strong><\/h5>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Just like classification trees, regression trees can be prone to overfitting. A tree that grows too deep may model noise in the data, leading to poor generalization. To counter this, pruning can be applied, which involves cutting back some branches of the tree to improve performance on unseen data.<\/li>\n<\/ul>\n\n\n\n<h5 class=\"wp-block-heading\">Example with the Golf Dataset:<\/h5>\n\n\n\n<p>In the golf dataset, where we have the number of golf players as the target variable, we might use weather conditions or time of day as the features. Since the target variable is a continuous value (e.g., the number of players), the algorithm would calculate the variance of the number of players within different subsets of the data based on these features. The tree would split the data based on the feature that most reduces this variance at each step.<\/p>\n\n\n\n<p>For instance, if the weather condition (e.g., sunny or rainy) significantly reduces the variance of the number of players, this would be chosen as a splitting criterion. The process continues recursively until the tree reaches a satisfactory depth or further splits no longer reduce variance significantly.<\/p>\n\n\n\n<p>Regression trees serve as powerful tools for handling continuous target variables, and the CART algorithm adapts this idea by focusing on minimizing variance within subsets rather than classifying instances into discrete classes. This enables decision trees to make classifications and predict continuous outcomes effectively.<\/p>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"NonlinearReg\"><strong>Non-Linear Polynomial Regression<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Non-Linear-Regression_-Polynomial-Regression-1-2.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Non-Linear Regression_ Polynomial Regression (1) (2).\"><\/object><a id=\"wp-block-file--media-d27ff9c9-2beb-46d9-a7f9-d052c23de531\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Non-Linear-Regression_-Polynomial-Regression-1-2.pdf\">Non-Linear Regression_ Polynomial Regression (1) (2)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Non-Linear-Regression_-Polynomial-Regression-1-2.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-d27ff9c9-2beb-46d9-a7f9-d052c23de531\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Detail Calculations of the above steps 4,5 &amp; 6<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Poly_Quadratic_Reg_SSR.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Poly_Quadratic_Reg_SSR.\"><\/object><a id=\"wp-block-file--media-925686b9-8b3d-4d03-b7b1-a61f1567fe00\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Poly_Quadratic_Reg_SSR.pdf\">Poly_Quadratic_Reg_SSR<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Poly_Quadratic_Reg_SSR.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-925686b9-8b3d-4d03-b7b1-a61f1567fe00\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Kernel-matrix-computation-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Kernel matrix computation-1 (1).\"><\/object><a id=\"wp-block-file--media-33eda3d9-d9cf-4d9c-a1b5-403207bdbfda\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Kernel-matrix-computation-1-1.pdf\">Kernel matrix computation-1 (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/Kernel-matrix-computation-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-33eda3d9-d9cf-4d9c-a1b5-403207bdbfda\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>What is Partial Least Squares (PLS) Regression?<\/strong><\/h2>\n\n\n\n<p><strong>PLS Regression<\/strong> is a statistical method that combines features of <strong>principal component analysis (PCA)<\/strong> and <strong>multiple regression<\/strong>.<\/p>\n\n\n\n<p>Its goal is to <strong>find latent components<\/strong> (linear combinations of predictors) that explain as much as possible of the covariance between predictors XXX and response YYY.<\/p>\n\n\n\n<p>PLS is especially useful when:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Predictors are highly <strong>correlated<\/strong> (multicollinearity problem).<\/li>\n\n\n\n<li>Number of predictors &gt; number of observations (high-dimensional data).<\/li>\n<\/ul>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/PLS-SSR.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of PLS-SSR.\"><\/object><a id=\"wp-block-file--media-96965eca-f39b-4aa4-bc4c-d68ea617e8dd\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/PLS-SSR.pdf\">PLS-SSR<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/10\/PLS-SSR.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-96965eca-f39b-4aa4-bc4c-d68ea617e8dd\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision-Tree-ID3-Example-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Decision-Tree-ID3-Example-1.\"><\/object><a id=\"wp-block-file--media-d47a4c79-52e5-40c2-8ac3-b4f14c61ccd9\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision-Tree-ID3-Example-1.pdf\">Decision-Tree-ID3-Example-1<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision-Tree-ID3-Example-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-d47a4c79-52e5-40c2-8ac3-b4f14c61ccd9\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision_Tree_CART_C4.5_SSROY_2025-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Decision_Tree_CART_C4.5_SSROY_2025-1.\"><\/object><a id=\"wp-block-file--media-e1ea4931-d531-48f3-a93a-ec6de4cf361c\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision_Tree_CART_C4.5_SSROY_2025-1.pdf\">Decision_Tree_CART_C4.5_SSROY_2025-1<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Decision_Tree_CART_C4.5_SSROY_2025-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-e1ea4931-d531-48f3-a93a-ec6de4cf361c\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><\/h2>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>Classification-Non Linear<\/strong><\/h2>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Bayesian classification<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Roy_Naive-Bayes-tutorial_ssroy2025NAIVE-BAYSE.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Roy_Naive-Bayes-tutorial_ssroy2025NAIVE-BAYSE.\"><\/object><a id=\"wp-block-file--media-912d7d6d-9037-4cde-b9ac-6c10d3b2747b\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Roy_Naive-Bayes-tutorial_ssroy2025NAIVE-BAYSE.pdf\">Roy_Naive-Bayes-tutorial_ssroy2025NAIVE-BAYSE<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/Roy_Naive-Bayes-tutorial_ssroy2025NAIVE-BAYSE.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-912d7d6d-9037-4cde-b9ac-6c10d3b2747b\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Bayesian classification(Real Values)<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SSROY_Naive-Bayes-tutorial_REALVALUES1-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of SSROY_Naive-Bayes-tutorial_REALVALUES1-1-1.\"><\/object><a id=\"wp-block-file--media-751f844a-51a0-471f-bd13-76217f66b53d\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SSROY_Naive-Bayes-tutorial_REALVALUES1-1-1.pdf\">SSROY_Naive-Bayes-tutorial_REALVALUES1-1-1<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SSROY_Naive-Bayes-tutorial_REALVALUES1-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-751f844a-51a0-471f-bd13-76217f66b53d\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"NN-A\"><strong>Neural Network-Part A<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-A_NN.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Part-A_NN.\"><\/object><a id=\"wp-block-file--media-fdc069ef-6253-4f85-be35-bede09ef508d\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-A_NN.pdf\">Part-A_NN<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-A_NN.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-fdc069ef-6253-4f85-be35-bede09ef508d\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec23: Introduction to Neural Network ||Mimicking the biological Neural Net|| Intro2Machine Learning\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/v0GLbfH7Kfw?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 24- Simple Neural Network and Pattern Classification\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/MCEW93hcBPY?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"NN-B\"><strong>Neural Network-Part B<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-B_NN.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Part-B_NN.\"><\/object><a id=\"wp-block-file--media-7364668c-e0bd-4bdf-b9d1-fb29aa89b3e1\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-B_NN.pdf\">Part-B_NN<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Part-B_NN.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-7364668c-e0bd-4bdf-b9d1-fb29aa89b3e1\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec-25 : Linear Separability || simple neural network\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/qKWPdfPELxY?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 26- Neural Network Exercises &amp; Solution || Introduction to Machine Learning\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/x7gupF_o69w?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 27-Solving Neural Network Exercises-PART-2\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/8Rz-Nwop2ME?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Lec 28-Multilayer perceptron\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/JwHKMhlQVtM?list=PLNC-2IODvQCC-bdFbjyFuh_przj0d5o25\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>Backpropagation<\/strong> NN<\/h2>\n\n\n\n<p>B<strong>ackpropagation <\/strong>is a learning algorithm for neural networks that works in two steps: first, the error between the predicted output and the target is calculated; then, using the <strong>chain rule of calculus<\/strong>, this error is propagated backward through the network to compute gradients for each weight. These gradients are then used to update the weights and improve the model\u2019s accuracy.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"770\" height=\"1024\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/ROY-2-770x1024.jpeg\" alt=\"\" class=\"wp-image-2295\" style=\"width:1198px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/ROY-2-770x1024.jpeg 770w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/ROY-2-226x300.jpeg 226w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/ROY-2-768x1021.jpeg 768w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/ROY-2.jpeg 963w\" sizes=\"auto, (max-width: 770px) 100vw, 770px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"407\" height=\"267\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-14.png\" alt=\"\" class=\"wp-image-2274\" style=\"width:1033px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-14.png 407w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-14-300x197.png 300w\" sizes=\"auto, (max-width: 407px) 100vw, 407px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"625\" height=\"355\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-11.png\" alt=\"\" class=\"wp-image-2271\" style=\"width:1200px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-11.png 625w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-11-300x170.png 300w\" sizes=\"auto, (max-width: 625px) 100vw, 625px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"586\" height=\"510\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-12.png\" alt=\"\" class=\"wp-image-2272\" style=\"width:1184px;height:auto\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-12.png 586w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-12-300x261.png 300w\" sizes=\"auto, (max-width: 586px) 100vw, 586px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"570\" height=\"111\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-13.png\" alt=\"\" class=\"wp-image-2273\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-13.png 570w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/image-13-300x58.png 300w\" sizes=\"auto, (max-width: 570px) 100vw, 570px\" \/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"SVM\"><strong>Support Vector Machine(SVM)<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/SVM-PART-A.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of SVM-PART-A.\"><\/object><a id=\"wp-block-file--media-170cc4b5-1313-4606-a5aa-751a5394983d\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/SVM-PART-A.pdf\">SVM-PART-A<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/SVM-PART-A.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-170cc4b5-1313-4606-a5aa-751a5394983d\">Download<\/a><\/div>\n\n\n\n<h4 class=\"wp-block-heading has-text-align-center\"><strong>SVM-PART-B(Simplified explanation)<\/strong><\/h4>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Support-Vector-Machine-SVM-and-Its-Mathematical-Formulation-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Support Vector Machine (SVM) and Its Mathematical Formulation (1).\"><\/object><a id=\"wp-block-file--media-5890204a-bc14-4866-af41-e916ff9186ce\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Support-Vector-Machine-SVM-and-Its-Mathematical-Formulation-1.pdf\">Support Vector Machine (SVM) and Its Mathematical Formulation (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/Support-Vector-Machine-SVM-and-Its-Mathematical-Formulation-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-5890204a-bc14-4866-af41-e916ff9186ce\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SVMSSROY.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of SVMSSROY.\"><\/object><a id=\"wp-block-file--media-3c52a4b7-8eca-4db4-9f95-86a8b4ec8c0b\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SVMSSROY.pdf\">SVMSSROY<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/SVMSSROY.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-3c52a4b7-8eca-4db4-9f95-86a8b4ec8c0b\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-7 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"707\" height=\"1024\" data-id=\"2158\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/1_roy-707x1024-1.jpeg\" alt=\"\" class=\"wp-image-2158\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/1_roy-707x1024-1.jpeg 707w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/1_roy-707x1024-1-207x300.jpeg 207w\" sizes=\"auto, (max-width: 707px) 100vw, 707px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"721\" height=\"1024\" data-id=\"2159\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/2_roy-721x1024-1.jpeg\" alt=\"\" class=\"wp-image-2159\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/2_roy-721x1024-1.jpeg 721w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/2_roy-721x1024-1-211x300.jpeg 211w\" sizes=\"auto, (max-width: 721px) 100vw, 721px\" \/><\/figure>\n<\/figure>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\" id=\"nonlinearsvm\"><\/h2>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-8 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"727\" height=\"1024\" data-id=\"2160\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/3_roy-727x1024-1.jpeg\" alt=\"\" class=\"wp-image-2160\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/3_roy-727x1024-1.jpeg 727w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/3_roy-727x1024-1-213x300.jpeg 213w\" sizes=\"auto, (max-width: 727px) 100vw, 727px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"718\" height=\"1024\" data-id=\"2161\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/4_roy-718x1024-1.jpeg\" alt=\"\" class=\"wp-image-2161\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/4_roy-718x1024-1.jpeg 718w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/09\/4_roy-718x1024-1-210x300.jpeg 210w\" sizes=\"auto, (max-width: 718px) 100vw, 718px\" \/><\/figure>\n<\/figure>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>SVM-Non Linear<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" id=\"nonlinearsvm\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Non-Linear-SVM-Solution-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Non-Linear-SVM-Solution-1 (1).\"><\/object><a id=\"wp-block-file--media-3c804626-9899-4ae3-8560-b7b47011c202\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Non-Linear-SVM-Solution-1-1.pdf\">Non-Linear-SVM-Solution-1 (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Non-Linear-SVM-Solution-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-3c804626-9899-4ae3-8560-b7b47011c202\">Download<\/a><\/div>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-9 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"580\" height=\"267\" data-id=\"1694\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/ama1.jpg\" alt=\"\" class=\"wp-image-1694\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/ama1.jpg 580w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/ama1-300x138.jpg 300w\" sizes=\"auto, (max-width: 580px) 100vw, 580px\" \/><\/figure>\n<\/figure>\n\n\n\n<figure class=\"wp-block-gallery has-nested-images columns-default is-cropped wp-block-gallery-10 is-layout-flex wp-block-gallery-is-layout-flex\">\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"541\" height=\"342\" data-id=\"1699\" src=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/1-1.jpg\" alt=\"\" class=\"wp-image-1699\" srcset=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/1-1.jpg 541w, https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/02\/1-1-300x190.jpg 300w\" sizes=\"auto, (max-width: 541px) 100vw, 541px\" \/><\/figure>\n<\/figure>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>Decision Tree: C4.5<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" id=\"C4.5\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/1-1_merged-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of 1-1_merged-1 (1).\"><\/object><a id=\"wp-block-file--media-c88c4621-84cc-49dc-a302-7837c3180c7a\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/1-1_merged-1-1.pdf\">1-1_merged-1 (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/1-1_merged-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-c88c4621-84cc-49dc-a302-7837c3180c7a\">Download<\/a><\/div>\n\n\n\n<h4 class=\"wp-block-heading has-text-align-center\" id=\"metrics\"><strong>Model Evaluation Metrics: Classification <\/strong><\/h4>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Classification-Metrics-Overview-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Classification Metrics Overview (1).\"><\/object><a id=\"wp-block-file--media-b77f024b-f92f-405d-b653-08b158849434\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Classification-Metrics-Overview-1.pdf\">Classification Metrics Overview (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Classification-Metrics-Overview-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-b77f024b-f92f-405d-b653-08b158849434\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading has-text-align-center\"><strong>Class Imbalance<\/strong><\/h2>\n\n\n\n<p id=\"imbalance\"><strong>Understanding Class Imbalance: Concepts and Challenges<\/strong><\/p>\n\n\n\n<p>Class imbalance is a pervasive issue in many real-world machine learning problems, especially in domains like fraud detection, medical diagnosis, anomaly detection, and rare event prediction, where the number of examples in one class significantly outweighs the others. In a typical binary classification scenario, a balanced dataset might contain roughly equal numbers of positive and negative samples, but with class imbalance, one class (majority) dominates the other (minority), leading to a skewed distribution. For instance, in a cancer detection dataset, healthy patients may represent 98% of the data, while only 2% represent the cancer-positive cases. This disparity poses challenges for standard classification algorithms, which tend to be biased towards the majority class, often resulting in high overall accuracy but poor recall for the minority class. Accuracy becomes a misleading metric in such cases, as a model predicting only the majority class can still achieve high accuracy while completely ignoring the minority class. Therefore, metrics like precision, recall, F1-score, area under the ROC curve (AUC-ROC), and confusion matrix analysis are preferred for evaluating performance in imbalanced settings. Moreover, class imbalance affects model learning by minimizing the influence of rare class patterns, leading to underfitting for minority class features. As a result, without proper handling, imbalanced datasets can result in models that are ill-equipped to make meaningful predictions about the minority class, which is often the most important class in critical applications.<\/p>\n\n\n\n<p>To address class imbalance, several techniques can be applied during data preprocessing, algorithm design, and evaluation. <strong>Resampling methods<\/strong> are widely used and include <em>undersampling<\/em> the majority class, <em>oversampling<\/em> the minority class, or employing <em>synthetic data generation<\/em> techniques like <strong>SMOTE<\/strong> (Synthetic Minority Oversampling Technique), which creates new minority class samples by interpolating between existing ones. While undersampling can lead to loss of potentially useful data, oversampling may cause overfitting due to repetition. Algorithm-level strategies, such as <em>cost-sensitive learning<\/em>, introduce penalty terms for misclassifying the minority class, thus guiding the model to pay more attention to it. Ensemble methods like <em>Random Forest<\/em>, <em>Boosting<\/em> (especially <em>AdaBoost<\/em> or <em>XGBoost<\/em> with appropriate parameter tuning), and hybrid techniques combining sampling with ensembles have shown effectiveness. Recent advances also include <em>generative models<\/em> and <em>deep learning architectures<\/em> that incorporate class reweighting or use focal loss to concentrate on hard-to-classify minority instances. Additionally, techniques like <em>threshold moving<\/em>, <em>data augmentation<\/em>, and <em>transfer learning<\/em> are gaining popularity in addressing imbalance. It is crucial to select appropriate evaluation metrics and visualize learning performance through tools like precision-recall curves or class-specific confusion matrices to ensure that the model generalizes well across all classes. Ultimately, handling class imbalance is not a one-size-fits-all solution\u2014it requires a thoughtful combination of strategies, tailored experimentation, and iterative refinement to develop robust, fair, and reliable machine learning models that perform equitably across all classes, especially the underrepresented ones.<\/p>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Imbalanced-Learning_-Foundations-Algorithms-and-Applications-2013-Wiley-IEEE-Press-10.1002_9781118646106-libgen.li_.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Imbalanced Learning_ Foundations, Algorithms, and Applications (2013, Wiley-IEEE Press) [10.1002_9781118646106] - libgen.li.\"><\/object><a id=\"wp-block-file--media-7fb25c77-3e0b-4066-b110-ba893a96361f\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Imbalanced-Learning_-Foundations-Algorithms-and-Applications-2013-Wiley-IEEE-Press-10.1002_9781118646106-libgen.li_.pdf\">Imbalanced Learning_ Foundations, Algorithms, and Applications (2013, Wiley-IEEE Press) [10.1002_9781118646106] &#8211; libgen.li<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Imbalanced-Learning_-Foundations-Algorithms-and-Applications-2013-Wiley-IEEE-Press-10.1002_9781118646106-libgen.li_.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-7fb25c77-3e0b-4066-b110-ba893a96361f\">Download<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"sampling\"><strong>Sampling method<\/strong><\/h2>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Sampling-Methods-for-Imbalance-1-1.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of Sampling Methods for Imbalance (1) (1).\"><\/object><a id=\"wp-block-file--media-aea5e014-6821-4d91-bf5a-de3e6ad15fd2\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Sampling-Methods-for-Imbalance-1-1.pdf\">Sampling Methods for Imbalance (1) (1)<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/Sampling-Methods-for-Imbalance-1-1.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-aea5e014-6821-4d91-bf5a-de3e6ad15fd2\">Download<\/a><\/div>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/ClassImbalancematrial_provided_by_ProfRoy.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of ClassImbalance+matrial_provided_by_ProfRoy.\"><\/object><a id=\"wp-block-file--media-c329cb1e-9221-49bb-a23a-2943df67a5b0\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/ClassImbalancematrial_provided_by_ProfRoy.pdf\">ClassImbalance+matrial_provided_by_ProfRoy<\/a><a href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/03\/ClassImbalancematrial_provided_by_ProfRoy.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-c329cb1e-9221-49bb-a23a-2943df67a5b0\">Download<\/a><\/div>\n\n\n\n<h4 class=\"wp-block-heading\"><strong>Time Series Analysis<\/strong><\/h4>\n\n\n\n<div data-wp-interactive=\"core\/file\" id=\"time\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/05\/TimeSeriesAnalysis.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"Embed of TimeSeriesAnalysis.\"><\/object><a id=\"wp-block-file--media-6c213819-4a96-4d44-832c-fe24ca3a3f42\" href=\"https:\/\/cyberenlightener.com\/wp-content\/uploads\/2025\/05\/TimeSeriesAnalysis.pdf\">TimeSeriesAnalysis<\/a><\/div>\n","protected":false},"excerpt":{"rendered":"<p>Predictive Modeling &#8211; Propensity, Cluster, &amp; Collaborative Filtering Models, Statistical Modeling Regression Model Comparison &#8211; Linear(Fitting a st line using the least square method), Measurement of metrics(Linear Regression),Multiple Linear Regression, Logistic Regression &amp; Non-Linear Regression Regression Trees &amp; Rules AprioriAlgorithm VARIOUS NORMALIZATION TECHNIQUES Transformations to Resolve Skewness Dealing with Missing Values In many situations, certain [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"class_list":["post-1324","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/pages\/1324","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1324"}],"version-history":[{"count":107,"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/pages\/1324\/revisions"}],"predecessor-version":[{"id":2312,"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=\/wp\/v2\/pages\/1324\/revisions\/2312"}],"wp:attachment":[{"href":"https:\/\/cyberenlightener.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1324"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}