Program to detect tokens in a C code (original) (raw)

Last Updated : 20 Dec, 2025

Lexical analysis is the first phase of a compiler, also known as the scanner. Its main task is to read the source program and break it into a sequence of meaningful units called tokens. These tokens help the compiler understand the structure of the program and proceed to later phases of compilation.For Example:

A C program is made up of different types of tokens. Each token belongs to a specific category such as keywords, identifiers, constants, string literals, operators, or symbols.

Common Types of Tokens in C

Example: For input "int a = b + 1c;", it will identify "int" as a keyword, "a" as an identifier, "=" as an operator, etc.

Approach :

Below is a program to print all the keywords, literals, valid identifiers, invalid identifiers, integer number, real number in a given C code:

C++ `

#include <stdbool.h> #include <stdio.h> #include <string.h> #include <stdlib.h>

// Returns 'true' if the character is a DELIMITER. bool isDelimiter(char ch) { if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ',' || ch == ';' || ch == '>' || ch == '<' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') return (true); return (false); }

// Returns 'true' if the character is an OPERATOR. bool isOperator(char ch) { if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=') return (true); return (false); }

// Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier(char* str) { if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true) return (false); return (true); }

// Returns 'true' if the string is a KEYWORD. bool isKeyword(char* str) { if (!strcmp(str, "if") || !strcmp(str, "else") || !strcmp(str, "while") || !strcmp(str, "do") || !strcmp(str, "break") || !strcmp(str, "continue") || !strcmp(str, "int") || !strcmp(str, "double") || !strcmp(str, "float") || !strcmp(str, "return") || !strcmp(str, "char") || !strcmp(str, "case") || !strcmp(str, "char") || !strcmp(str, "sizeof") || !strcmp(str, "long") || !strcmp(str, "short") || !strcmp(str, "typedef") || !strcmp(str, "switch") || !strcmp(str, "unsigned") || !strcmp(str, "void") || !strcmp(str, "static") || !strcmp(str, "struct") || !strcmp(str, "goto")) return (true); return (false); }

// Returns 'true' if the string is an INTEGER. bool isInteger(char* str) { int i, len = strlen(str);

if (len == 0)
    return (false);
for (i = 0; i < len; i++) {
    if (str[i] != '0' && str[i] != '1' && str[i] != '2'
        && str[i] != '3' && str[i] != '4' && str[i] != '5'
        && str[i] != '6' && str[i] != '7' && str[i] != '8'
        && str[i] != '9' || (str[i] == '-' && i > 0))
        return (false);
}
return (true);

}

// Returns 'true' if the string is a REAL NUMBER. bool isRealNumber(char* str) { int i, len = strlen(str); bool hasDecimal = false;

if (len == 0)
    return (false);
for (i = 0; i < len; i++) {
    if (str[i] != '0' && str[i] != '1' && str[i] != '2'
        && str[i] != '3' && str[i] != '4' && str[i] != '5'
        && str[i] != '6' && str[i] != '7' && str[i] != '8'
        && str[i] != '9' && str[i] != '.' || 
        (str[i] == '-' && i > 0))
        return (false);
    if (str[i] == '.')
        hasDecimal = true;
}
return (hasDecimal);

}

// Extracts the SUBSTRING. char* subString(char* str, int left, int right) { int i; char* subStr = (char*)malloc( sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)
    subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);

}

// Parsing the input STRING. void parse(char* str) { int left = 0, right = 0; int len = strlen(str);

while (right <= len && left <= right) {
    if (isDelimiter(str[right]) == false)
        right++;

    if (isDelimiter(str[right]) == true && left == right) {
        if (isOperator(str[right]) == true)
            printf("'%c' IS AN OPERATOR\n", str[right]);

        right++;
        left = right;
    } else if (isDelimiter(str[right]) == true && left != right
               || (right == len && left != right)) {
        char* subStr = subString(str, left, right - 1);

        if (isKeyword(subStr) == true)
            printf("'%s' IS A KEYWORD\n", subStr);

        else if (isInteger(subStr) == true)
            printf("'%s' IS AN INTEGER\n", subStr);

        else if (isRealNumber(subStr) == true)
            printf("'%s' IS A REAL NUMBER\n", subStr);

        else if (validIdentifier(subStr) == true
                 && isDelimiter(str[right - 1]) == false)
            printf("'%s' IS A VALID IDENTIFIER\n", subStr);

        else if (validIdentifier(subStr) == false
                 && isDelimiter(str[right - 1]) == false)
            printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
        left = right;
    }
}
return;

}

// DRIVER FUNCTION int main() { // maximum length of string is 100 here char str[100] = "int a = b + 1c; ";

parse(str); // calling the parse function

return (0);

}

C

#include <stdbool.h> #include <stdio.h> #include <string.h> #include <stdlib.h>

// Returns 'true' if the character is a DELIMITER. bool isDelimiter(char ch) { if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ',' || ch == ';' || ch == '>' || ch == '<' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') return (true); return (false); }

// Returns 'true' if the character is an OPERATOR. bool isOperator(char ch) { if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=') return (true); return (false); }

// Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier(char* str) { if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true) return (false); return (true); }

// Returns 'true' if the string is a KEYWORD. bool isKeyword(char* str) { if (!strcmp(str, "if") || !strcmp(str, "else") || !strcmp(str, "while") || !strcmp(str, "do") || !strcmp(str, "break") || !strcmp(str, "continue") || !strcmp(str, "int") || !strcmp(str, "double") || !strcmp(str, "float") || !strcmp(str, "return") || !strcmp(str, "char") || !strcmp(str, "case") || !strcmp(str, "char") || !strcmp(str, "sizeof") || !strcmp(str, "long") || !strcmp(str, "short") || !strcmp(str, "typedef") || !strcmp(str, "switch") || !strcmp(str, "unsigned") || !strcmp(str, "void") || !strcmp(str, "static") || !strcmp(str, "struct") || !strcmp(str, "goto")) return (true); return (false); }

// Returns 'true' if the string is an INTEGER. bool isInteger(char* str) { int i, len = strlen(str);

if (len == 0)
    return (false);
for (i = 0; i < len; i++) {
    if (str[i] != '0' && str[i] != '1' && str[i] != '2'
        && str[i] != '3' && str[i] != '4' && str[i] != '5'
        && str[i] != '6' && str[i] != '7' && str[i] != '8'
        && str[i] != '9' || (str[i] == '-' && i > 0))
        return (false);
}
return (true);

}

// Returns 'true' if the string is a REAL NUMBER. bool isRealNumber(char* str) { int i, len = strlen(str); bool hasDecimal = false;

if (len == 0)
    return (false);
for (i = 0; i < len; i++) {
    if (str[i] != '0' && str[i] != '1' && str[i] != '2'
        && str[i] != '3' && str[i] != '4' && str[i] != '5'
        && str[i] != '6' && str[i] != '7' && str[i] != '8'
        && str[i] != '9' && str[i] != '.' || 
        (str[i] == '-' && i > 0))
        return (false);
    if (str[i] == '.')
        hasDecimal = true;
}
return (hasDecimal);

}

// Extracts the SUBSTRING. char* subString(char* str, int left, int right) { int i; char* subStr = (char*)malloc( sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)
    subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);

}

// Parsing the input STRING. void parse(char* str) { int left = 0, right = 0; int len = strlen(str);

while (right <= len && left <= right) {
    if (isDelimiter(str[right]) == false)
        right++;

    if (isDelimiter(str[right]) == true && left == right) {
        if (isOperator(str[right]) == true)
            printf("'%c' IS AN OPERATOR\n", str[right]);

        right++;
        left = right;
    } else if (isDelimiter(str[right]) == true && left != right
               || (right == len && left != right)) {
        char* subStr = subString(str, left, right - 1);

        if (isKeyword(subStr) == true)
            printf("'%s' IS A KEYWORD\n", subStr);

        else if (isInteger(subStr) == true)
            printf("'%s' IS AN INTEGER\n", subStr);

        else if (isRealNumber(subStr) == true)
            printf("'%s' IS A REAL NUMBER\n", subStr);

        else if (validIdentifier(subStr) == true
                 && isDelimiter(str[right - 1]) == false)
            printf("'%s' IS A VALID IDENTIFIER\n", subStr);

        else if (validIdentifier(subStr) == false
                 && isDelimiter(str[right - 1]) == false)
            printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
        left = right;
    }
}
return;

}

// DRIVER FUNCTION int main() { // maximum length of string is 100 here char str[100] = "int a = b + 1c; ";

parse(str); // calling the parse function

return (0);

}

Java

import java.util.Arrays;

public class Parser { public static boolean isDelimiter(char ch) { return " +-*/ ,;><=()[]{}".indexOf(ch) != -1; }

public static boolean isOperator(char ch) {
    return "+\-*/><=".indexOf(ch) != -1;
}

public static boolean validIdentifier(String str) {
    if (str.isEmpty() || Character.isDigit(str.charAt(0)) || isDelimiter(str.charAt(0)))
        return false;
    return true;
}

public static boolean isKeyword(String str) {
    String[] keywords = { "if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto" };
    return Arrays.asList(keywords).contains(str);
}

public static boolean isInteger(String str) {
    if (str.isEmpty())
        return false;
    for (int i = 0; i < str.length(); i++) {
        if (!Character.isDigit(str.charAt(i)) && !(str.charAt(i) == '-' && i == 0))
            return false;
    }
    return true;
}

public static boolean isRealNumber(String str) {
    if (str.isEmpty())
        return false;
    boolean hasDecimal = false;
    for (int i = 0; i < str.length(); i++) {
        if (!Character.isDigit(str.charAt(i)) && str.charAt(i) != '.' && !(str.charAt(i) == '-' && i == 0))
            return false;
        if (str.charAt(i) == '.')
            hasDecimal = true;
    }
    return hasDecimal;
}

public static String subString(String str, int left, int right) {
    return str.substring(left, right + 1);
}

public static void parse(String str) {
    int left = 0, right = 0;
    int len = str.length();

    while (right <= len && left <= right) {
        if (!isDelimiter(str.charAt(right)))
            right++;

        if (isDelimiter(str.charAt(right)) && left == right) {
            if (isOperator(str.charAt(right)))
                System.out.println("'" + str.charAt(right) + "' IS AN OPERATOR");

            right++;
            left = right;
        } else if (isDelimiter(str.charAt(right)) && left != right || (right == len && left != right)) {
            String subStr = subString(str, left, right - 1);

            if (isKeyword(subStr))
                System.out.println("'" + subStr + "' IS A KEYWORD");

            else if (isInteger(subStr))
                System.out.println("'" + subStr + "' IS AN INTEGER");

            else if (isRealNumber(subStr))
                System.out.println("'" + subStr + "' IS A REAL NUMBER");

            else if (validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
                System.out.println("'" + subStr + "' IS A VALID IDENTIFIER");

            else if (!validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
                System.out.println("'" + subStr + "' IS NOT A VALID IDENTIFIER");

            left = right;
        }
    }
}

public static void main(String[] args) {
    String str = "int a = b + 1c; ";
    parse(str);
}

}

Python

def is_delimiter(ch): return ch in ' +-*/ ,;><=()[]{}'

def is_operator(ch): return ch in '+-*/><='

def valid_identifier(str): if not str or str[0].isdigit() or is_delimiter(str[0]): return False return True

def is_keyword(str): keywords = ["if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto"] return str in keywords

def is_integer(str): if not str: return False for i in range(len(str)): if not str[i].isdigit() and not (str[i] == '-' and i == 0): return False return True

def is_real_number(str): if not str: return False has_decimal = False for i in range(len(str)): if not str[i].isdigit() and str[i] != '.' and not (str[i] == '-' and i == 0): return False if str[i] == '.': has_decimal = True return has_decimal

def sub_string(str, left, right): return str[left:right + 1]

def parse(str): left = 0 right = 0 len_str = len(str)

while right <= len_str and left <= right:
    if not is_delimiter(str[right]):
        right += 1

    if is_delimiter(str[right]) and left == right:
        if is_operator(str[right]):
            print(f"'{str[right]}' IS AN OPERATOR")

        right += 1
        left = right
    elif is_delimiter(str[right]) and left != right or (right == len_str and left != right):
        sub_str = sub_string(str, left, right - 1)

        if is_keyword(sub_str):
            print(f"'{sub_str}' IS A KEYWORD")

        elif is_integer(sub_str):
            print(f"'{sub_str}' IS AN INTEGER")

        elif is_real_number(sub_str):
            print(f"'{sub_str}' IS A REAL NUMBER")

        elif valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
            print(f"'{sub_str}' IS A VALID IDENTIFIER")

        elif not valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
            print(f"'{sub_str}' IS NOT A VALID IDENTIFIER")

        left = right

if name == 'main': str = 'int a = b + 1c; ' parse(str)

`

**Output:

'int' IS A KEYWORD
'a' IS A VALID IDENTIFIER
'=' IS AN OPERATOR
'b' IS A VALID IDENTIFIER
'+' IS AN OPERATOR
'1c' IS NOT A VALID IDENTIFIER